Skip to content
Merged
2 changes: 2 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ config :logflare, Logflare.Google, dataset_id_append: "_default"
config :logflare, :postgres_backend_adapter, pool_size: 3
config :logflare, :clickhouse_backend_adapter, pool_size: 3

config :logflare, :bigquery_backend_adaptor, managed_service_account_pool_size: 0

config :logflare, Logflare.Source.BigQuery.Schema, updates_per_minute: 6

# Configures the endpoint
Expand Down
9 changes: 9 additions & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,15 @@ config :logflare,
]
|> filter_nil_kv_pairs.()

config :logflare,
:bigquery_backend_adaptor,
[
managed_service_account_pool_size:
System.get_env("LOGFLARE_BIGQUERY_MANAGED_SA_POOL", "0")
|> String.to_integer()
]
|> filter_nil_kv_pairs.()

config :logflare,
Logflare.Alerting,
[
Expand Down
45 changes: 25 additions & 20 deletions docs/docs.logflare.com/docs/self-hosting/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,26 @@ Setting `LOGFLARE_METADATA_CLUSTER=production` will result the following payload

### BigQuery Backend Configuration

| Env Var | Type | Description |
| -------------------------- | --------------------------- | ------------------------------------------------------------- |
| `GOOGLE_PROJECT_ID` | string, required | Specifies the GCP project to use. |
| `GOOGLE_PROJECT_NUMBER` | string, required | Specifies the GCP project to use. |
| `GOOGLE_DATASET_ID_APPEND` | string, defaults to `_prod` | This allows customization of the dataset created in BigQuery. |
| Env Var | Type | Description |
| ----------------------------------- | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `GOOGLE_PROJECT_ID` | string, required | Specifies the GCP project to use. |
| `GOOGLE_PROJECT_NUMBER` | string, required | Specifies the GCP project to use. |
| `GOOGLE_DATASET_ID_APPEND` | string, defaults to `_prod` | This allows customization of the dataset created in BigQuery. |
| `LOGFLARE_BIGQUERY_MANAGED_SA_POOL` | Integer, defaults to `0` | Sets the number of managed service accounts to create for BigQuery API operations. When set to 0, managed service accounts are disabled, and all queries will run throguh the main service account. |

#### Managed Service Accounts

When `LOGFLARE_BIGQUERY_MANAGED_SA_POOL` is a non-zero value, managed service accounts will use impersonation when making requests against the BigQuery REST API. Increase this value when experiencing rate limiting.

This is due to BigQuery having a fixed 100 requests per second per user limit on their core REST API. However, service account impersonation allows us to spread out requests across multiple service accounts, thereby avoiding this limitation.

Managed service accounts will be provisioned automatically by the server, hence it will require additional permissions:

- `roles/resourcemanager.projectIamAdmin`
- `roles/iam.serviceAccountCreator`
- `roles/iam.serviceAccountTokenCreator`

Without these two additional permissions, the managed service accounts feature will not work.

### PostgreSQL Backend Configuration

Expand Down Expand Up @@ -135,21 +150,11 @@ The requirements for server startup are as follows after creating the project:

To ensure that you have sufficient permissions to insert into your Google Cloud BigQuery, ensure that you have created a service account with either:

- BigQuery Admin role; or
- The following permissions:
- bigquery.datasets.create
- bigquery.datasets.get
- bigquery.datasets.getIamPolicy
- bigquery.datasets.update
- bigquery.jobs.create
- bigquery.routines.create
- bigquery.routines.update
- bigquery.tables.create
- bigquery.tables.delete
- bigquery.tables.get
- bigquery.tables.getData
- bigquery.tables.update
- bigquery.tables.updateData
- `roles/bigquery.admin`
- for [managed service accounts](#managed-service-accounts)
- `roles/resourcemanager.projectIamAdmin`
- `role/iam.serviceAccountCreator`
- `role/iam.serviceAccountTokenCreator`

We recommend setting the BigQuery Admin role, as it simplifies permissions setup.

Expand Down
73 changes: 11 additions & 62 deletions lib/logflare/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ defmodule Logflare.Application do
use Application
require Logger

alias Logflare.Backends.Adaptor.BigQueryAdaptor
alias Logflare.ContextCache
alias Logflare.Logs
alias Logflare.SingleTenant
Expand Down Expand Up @@ -123,58 +124,13 @@ defmodule Logflare.Application do
]
end

def goth_partition_count, do: 5

def conditional_children do
goth =
case Application.get_env(:goth, :json) do
nil ->
[]

json ->
# Setup Goth for GCP connections
credentials = Jason.decode!(json)
scopes = ["https://www.googleapis.com/auth/cloud-platform"]
source = {:service_account, credentials, scopes: scopes}

spec =
{
Goth,
# https://hexdocs.pm/goth/Goth.html#fetch/2
# refresh 15 min before
# don't start server until fetch is made
# cap retries at 10s, warn when >5
name: Logflare.Goth,
source: source,
refresh_before: 60 * 15,
prefetch: :sync,
http_client: &goth_finch_http_client/1,
retry_delay: fn
n when n < 3 ->
1000

n when n < 5 ->
Logger.warning("Goth refresh retry count is #{n}")
1000 * 3

n when n < 10 ->
Logger.warning("Goth refresh retry count is #{n}")
1000 * 5

n ->
Logger.warning("Goth refresh retry count is #{n}")
1000 * 10
end
}

# Partition Goth
[
{PartitionSupervisor,
child_spec: spec,
name: Logflare.GothPartitionSup,
with_arguments: fn [opts], partition ->
[Keyword.put(opts, :name, {Logflare.Goth, partition})]
end}
]
end
[
BigQueryAdaptor.partitioned_goth_child_spec()
] ++ BigQueryAdaptor.impersonated_goth_child_specs()

# only add in config cat to multi-tenant prod
config_cat =
Expand All @@ -191,18 +147,6 @@ defmodule Logflare.Application do
:ok
end

# tell goth to use our finch pool
# https://github.com/peburrows/goth/blob/master/lib/goth/token.ex#L144
defp goth_finch_http_client(options) do
{method, options} = Keyword.pop!(options, :method)
{url, options} = Keyword.pop!(options, :url)
{headers, options} = Keyword.pop!(options, :headers)
{body, options} = Keyword.pop!(options, :body)

Finch.build(method, url, headers, body)
|> Finch.request(Logflare.FinchGoth, options)
end

defp finch_pools do
base = System.schedulers_online()
min_count = max(5, ceil(base / 10))
Expand Down Expand Up @@ -278,6 +222,11 @@ defmodule Logflare.Application do
# if single tenant, insert enterprise user
Logger.info("Executing startup tasks")

if !SingleTenant.postgres_backend?() do
BigQueryAdaptor.create_managed_service_accounts()
BigQueryAdaptor.update_iam_policy()
end

if SingleTenant.single_tenant?() do
Logger.info("Ensuring single tenant user is seeded...")
SingleTenant.create_default_plan()
Expand Down
Loading
Loading