From f17bad660d2799cb0d1b1705b5e3e3ab66946541 Mon Sep 17 00:00:00 2001 From: Joe <1264204425@qq.com> Date: Fri, 28 Jun 2024 15:01:08 +0800 Subject: [PATCH 01/25] feat: dataset_operator dataset visibility --- api/models/account.py | 4 +++ api/models/dataset.py | 1 - api/services/dataset_service.py | 50 ++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/api/models/account.py b/api/models/account.py index 31b79fe9a7d6e1..784c760df3b3b4 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -80,6 +80,10 @@ def current_tenant_id(self, value): self._current_tenant = tenant + @property + def current_role(self): + return self._current_tenant.current_role + def get_status(self) -> AccountStatus: status_str = self.status return AccountStatus(status_str) diff --git a/api/models/dataset.py b/api/models/dataset.py index 7100b987cd3520..7c8a871aea1dc5 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -676,6 +676,5 @@ class DatasetPermission(db.Model): id = db.Column(StringUUID, server_default=db.text('uuid_generate_v4()'), primary_key=True) dataset_id = db.Column(StringUUID, nullable=False) account_id = db.Column(StringUUID, nullable=False) - account_role = db.Column(db.String(255), nullable=False, server_default=db.text("'normal'::character varying")) has_permission = db.Column(db.Boolean, nullable=False, server_default=db.text('true')) created_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 97a7e74744b749..0ade170130ef4a 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -57,35 +57,25 @@ class DatasetService: @staticmethod def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, search=None, tag_ids=None): - # current user identity is the database manager, and the knowledge base permissions are all - current_user_role = current_user._current_tenant.current_role - if current_user_role == TenantAccountRole.DATASET_OPERATOR: - # through the dataset_permission table to find the dataset_id that the current user has permission to - dataset_permission = DatasetPermission.query.filter_by(account_id=current_user.id).all() - dataset_ids = [dp.dataset_id for dp in dataset_permission] - # query the dataset table to get the dataset information - query = Dataset.query.filter( - db.and_(Dataset.provider == provider, Dataset.tenant_id == tenant_id, Dataset.id.in_(dataset_ids)) - ) \ - .order_by(Dataset.created_at.desc()) - datasets = query.paginate( - page=page, - per_page=per_page, - max_per_page=100, - error_out=False - ) - return datasets.items, datasets.total - if user: - permission_filter = db.or_(Dataset.created_by == user.id, - Dataset.permission == 'all_team_members', - Dataset.permission == 'partial_members' - ) + if user.current_role == TenantAccountRole.DATASET_OPERATOR: + dataset_permission = DatasetPermission.query.filter_by(account_id=user.id).all() + if dataset_permission: + dataset_ids = [dp.dataset_id for dp in dataset_permission] + + return DatasetService.get_datasets_by_ids(dataset_ids, tenant_id) + permission_filter = db.false() + else: + permission_filter = db.or_( + Dataset.created_by == user.id, + Dataset.permission == 'all_team_members', + ) else: permission_filter = Dataset.permission == 'all_team_members' query = Dataset.query.filter( db.and_(Dataset.provider == provider, Dataset.tenant_id == tenant_id, permission_filter)) \ .order_by(Dataset.created_at.desc()) + if search: query = query.filter(db.and_(Dataset.name.ilike(f'%{search}%'))) if tag_ids: @@ -101,6 +91,11 @@ def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, s error_out=False ) + # check datasets permission + datasets.items, datasets.total = DatasetService.filter_datasets_by_permission( + user, datasets.items + ) + return datasets.items, datasets.total @staticmethod @@ -312,6 +307,15 @@ def get_related_apps(dataset_id: str): return AppDatasetJoin.query.filter(AppDatasetJoin.dataset_id == dataset_id) \ .order_by(db.desc(AppDatasetJoin.created_at)).all() + @staticmethod + def filter_datasets_by_permission(user, datasets): + # 检查datasets 如果为部分成员可见,检查是否有权限 + dataset_permission = DatasetPermission.query.filter_by(account_id=user.id).all() + if dataset_permission: + dataset_ids = [dp.dataset_id for dp in dataset_permission] + if dataset_ids: + return DatasetService.get_datasets_by_ids(dataset_ids, user.current_tenant_id) + return [], 0 class DocumentService: DEFAULT_RULES = { From 2ad7ea1122315d5281d7546b09f48a61bee82bbe Mon Sep 17 00:00:00 2001 From: Joe <1264204425@qq.com> Date: Fri, 28 Jun 2024 16:51:11 +0800 Subject: [PATCH 02/25] feat: update filter datasets by permission --- api/services/dataset_service.py | 217 +++++++++++++++++++------------- 1 file changed, 133 insertions(+), 84 deletions(-) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 0ade170130ef4a..ca494c4dcff2fd 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -57,44 +57,49 @@ class DatasetService: @staticmethod def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, search=None, tag_ids=None): + query = Dataset.query.filter(Dataset.provider == provider, Dataset.tenant_id == tenant_id) + if user: if user.current_role == TenantAccountRole.DATASET_OPERATOR: dataset_permission = DatasetPermission.query.filter_by(account_id=user.id).all() if dataset_permission: dataset_ids = [dp.dataset_id for dp in dataset_permission] - - return DatasetService.get_datasets_by_ids(dataset_ids, tenant_id) - permission_filter = db.false() + query = query.filter(Dataset.id.in_(dataset_ids)) + else: + query = query.filter(db.false()) else: permission_filter = db.or_( Dataset.created_by == user.id, Dataset.permission == 'all_team_members', + Dataset.permission == 'partial_members' ) + query = query.filter(permission_filter) else: permission_filter = Dataset.permission == 'all_team_members' - query = Dataset.query.filter( - db.and_(Dataset.provider == provider, Dataset.tenant_id == tenant_id, permission_filter)) \ - .order_by(Dataset.created_at.desc()) + query = query.filter(permission_filter) if search: - query = query.filter(db.and_(Dataset.name.ilike(f'%{search}%'))) + query = query.filter(Dataset.name.ilike(f'%{search}%')) + if tag_ids: target_ids = TagService.get_target_ids_by_tag_ids('knowledge', tenant_id, tag_ids) if target_ids: - query = query.filter(db.and_(Dataset.id.in_(target_ids))) + query = query.filter(Dataset.id.in_(target_ids)) else: return [], 0 + datasets = query.paginate( page=page, per_page=per_page, max_per_page=100, error_out=False ) - - # check datasets permission - datasets.items, datasets.total = DatasetService.filter_datasets_by_permission( - user, datasets.items - ) + print("================第一次", datasets.items, datasets.total) + # check datasets permission, assuming this function exists + if user and user.current_role != TenantAccountRole.DATASET_OPERATOR: + datasets.items, datasets.total = DatasetService.filter_datasets_by_permission( + user, datasets + ) return datasets.items, datasets.total @@ -119,9 +124,12 @@ def get_process_rules(dataset_id): @staticmethod def get_datasets_by_ids(ids, tenant_id): - datasets = Dataset.query.filter(Dataset.id.in_(ids), - Dataset.tenant_id == tenant_id).paginate( - page=1, per_page=len(ids), max_per_page=len(ids), error_out=False) + datasets = Dataset.query.filter( + Dataset.id.in_(ids), + Dataset.tenant_id == tenant_id + ).paginate( + page=1, per_page=len(ids), max_per_page=len(ids), error_out=False + ) return datasets.items, datasets.total @staticmethod @@ -129,7 +137,8 @@ def create_empty_dataset(tenant_id: str, name: str, indexing_technique: Optional # check if dataset name already exists if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first(): raise DatasetNameDuplicateError( - f'Dataset with name {name} already exists.') + f'Dataset with name {name} already exists.' + ) embedding_model = None if indexing_technique == 'high_quality': model_manager = ModelManager() @@ -168,10 +177,13 @@ def check_dataset_model_setting(dataset): except LLMBadRequestError: raise ValueError( "No Embedding Model available. Please configure a valid provider " - "in the Settings -> Model Provider.") + "in the Settings -> Model Provider." + ) except ProviderTokenNotInitError as ex: - raise ValueError(f"The dataset in unavailable, due to: " - f"{ex.description}") + raise ValueError( + f"The dataset in unavailable, due to: " + f"{ex.description}" + ) @staticmethod def update_dataset(dataset_id, data, user): @@ -208,12 +220,13 @@ def update_dataset(dataset_id, data, user): except LLMBadRequestError: raise ValueError( "No Embedding Model available. Please configure a valid provider " - "in the Settings -> Model Provider.") + "in the Settings -> Model Provider." + ) except ProviderTokenNotInitError as ex: raise ValueError(ex.description) else: if data['embedding_model_provider'] != dataset.embedding_model_provider or \ - data['embedding_model'] != dataset.embedding_model: + data['embedding_model'] != dataset.embedding_model: action = 'update' try: model_manager = ModelManager() @@ -233,7 +246,8 @@ def update_dataset(dataset_id, data, user): except LLMBadRequestError: raise ValueError( "No Embedding Model available. Please configure a valid provider " - "in the Settings -> Model Provider.") + "in the Settings -> Model Provider." + ) except ProviderTokenNotInitError as ex: raise ValueError(ex.description) @@ -273,14 +287,18 @@ def delete_dataset(dataset_id, user): def check_dataset_permission(dataset, user): if dataset.tenant_id != user.current_tenant_id: logging.debug( - f'User {user.id} does not have permission to access dataset {dataset.id}') + f'User {user.id} does not have permission to access dataset {dataset.id}' + ) raise NoPermissionError( - 'You do not have permission to access this dataset.') + 'You do not have permission to access this dataset.' + ) if dataset.permission == 'only_me' and dataset.created_by != user.id: logging.debug( - f'User {user.id} does not have permission to access dataset {dataset.id}') + f'User {user.id} does not have permission to access dataset {dataset.id}' + ) raise NoPermissionError( - 'You do not have permission to access this dataset.') + 'You do not have permission to access this dataset.' + ) if dataset.permission == 'partial_members': user_permission = DatasetPermission.query.filter_by( dataset_id=dataset.id, account_id=user.id @@ -309,14 +327,20 @@ def get_related_apps(dataset_id: str): @staticmethod def filter_datasets_by_permission(user, datasets): - # 检查datasets 如果为部分成员可见,检查是否有权限 dataset_permission = DatasetPermission.query.filter_by(account_id=user.id).all() if dataset_permission: - dataset_ids = [dp.dataset_id for dp in dataset_permission] - if dataset_ids: - return DatasetService.get_datasets_by_ids(dataset_ids, user.current_tenant_id) + permitted_dataset_ids = {dp.dataset_id for dp in dataset_permission} + filtered_datasets = [dataset for dataset in datasets if + dataset.permission == 'all_team_members' or dataset.permission == 'only_me' or dataset.id in permitted_dataset_ids] + if filtered_datasets: + return filtered_datasets, len(filtered_datasets) + else: + all_members_visible_datasets = [dataset for dataset in datasets if dataset.permission == 'all_team_members'] + return all_members_visible_datasets, len(all_members_visible_datasets) + return [], 0 + class DocumentService: DEFAULT_RULES = { 'mode': 'custom', @@ -581,6 +605,7 @@ def sync_website_document(dataset_id: str, document: Document): redis_client.setex(sync_indexing_cache_key, 600, 1) sync_website_document_indexing_task.delay(dataset_id, document.id) + @staticmethod def get_documents_position(dataset_id): document = Document.query.filter_by(dataset_id=dataset_id).order_by(Document.position.desc()).first() @@ -590,9 +615,11 @@ def get_documents_position(dataset_id): return 1 @staticmethod - def save_document_with_dataset_id(dataset: Dataset, document_data: dict, - account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None, - created_from: str = 'web'): + def save_document_with_dataset_id( + dataset: Dataset, document_data: dict, + account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None, + created_from: str = 'web' + ): # check document limit features = FeatureService.get_features(current_user.current_tenant_id) @@ -622,7 +649,7 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict, if not dataset.indexing_technique: if 'indexing_technique' not in document_data \ - or document_data['indexing_technique'] not in Dataset.INDEXING_TECHNIQUE_LIST: + or document_data['indexing_technique'] not in Dataset.INDEXING_TECHNIQUE_LIST: raise ValueError("Indexing technique is required") dataset.indexing_technique = document_data["indexing_technique"] @@ -652,7 +679,8 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict, } dataset.retrieval_model = document_data.get('retrieval_model') if document_data.get( - 'retrieval_model') else default_retrieval_model + 'retrieval_model' + ) else default_retrieval_model documents = [] batch = time.strftime('%Y%m%d%H%M%S') + str(random.randint(100000, 999999)) @@ -720,12 +748,14 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict, documents.append(document) duplicate_document_ids.append(document.id) continue - document = DocumentService.build_document(dataset, dataset_process_rule.id, - document_data["data_source"]["type"], - document_data["doc_form"], - document_data["doc_language"], - data_source_info, created_from, position, - account, file_name, batch) + document = DocumentService.build_document( + dataset, dataset_process_rule.id, + document_data["data_source"]["type"], + document_data["doc_form"], + document_data["doc_language"], + data_source_info, created_from, position, + account, file_name, batch + ) db.session.add(document) db.session.flush() document_ids.append(document.id) @@ -766,12 +796,14 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict, "notion_page_icon": page['page_icon'], "type": page['type'] } - document = DocumentService.build_document(dataset, dataset_process_rule.id, - document_data["data_source"]["type"], - document_data["doc_form"], - document_data["doc_language"], - data_source_info, created_from, position, - account, page['page_name'], batch) + document = DocumentService.build_document( + dataset, dataset_process_rule.id, + document_data["data_source"]["type"], + document_data["doc_form"], + document_data["doc_language"], + data_source_info, created_from, position, + account, page['page_name'], batch + ) db.session.add(document) db.session.flush() document_ids.append(document.id) @@ -793,12 +825,14 @@ def save_document_with_dataset_id(dataset: Dataset, document_data: dict, 'only_main_content': website_info.get('only_main_content', False), 'mode': 'crawl', } - document = DocumentService.build_document(dataset, dataset_process_rule.id, - document_data["data_source"]["type"], - document_data["doc_form"], - document_data["doc_language"], - data_source_info, created_from, position, - account, url, batch) + document = DocumentService.build_document( + dataset, dataset_process_rule.id, + document_data["data_source"]["type"], + document_data["doc_form"], + document_data["doc_language"], + data_source_info, created_from, position, + account, url, batch + ) db.session.add(document) db.session.flush() document_ids.append(document.id) @@ -819,13 +853,16 @@ def check_documents_upload_quota(count: int, features: FeatureModel): can_upload_size = features.documents_upload_quota.limit - features.documents_upload_quota.size if count > can_upload_size: raise ValueError( - f'You have reached the limit of your subscription. Only {can_upload_size} documents can be uploaded.') + f'You have reached the limit of your subscription. Only {can_upload_size} documents can be uploaded.' + ) @staticmethod - def build_document(dataset: Dataset, process_rule_id: str, data_source_type: str, document_form: str, - document_language: str, data_source_info: dict, created_from: str, position: int, - account: Account, - name: str, batch: str): + def build_document( + dataset: Dataset, process_rule_id: str, data_source_type: str, document_form: str, + document_language: str, data_source_info: dict, created_from: str, position: int, + account: Account, + name: str, batch: str + ): document = Document( tenant_id=dataset.tenant_id, dataset_id=dataset.id, @@ -844,16 +881,20 @@ def build_document(dataset: Dataset, process_rule_id: str, data_source_type: str @staticmethod def get_tenant_documents_count(): - documents_count = Document.query.filter(Document.completed_at.isnot(None), - Document.enabled == True, - Document.archived == False, - Document.tenant_id == current_user.current_tenant_id).count() + documents_count = Document.query.filter( + Document.completed_at.isnot(None), + Document.enabled == True, + Document.archived == False, + Document.tenant_id == current_user.current_tenant_id + ).count() return documents_count @staticmethod - def update_document_with_dataset_id(dataset: Dataset, document_data: dict, - account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None, - created_from: str = 'web'): + def update_document_with_dataset_id( + dataset: Dataset, document_data: dict, + account: Account, dataset_process_rule: Optional[DatasetProcessRule] = None, + created_from: str = 'web' + ): DatasetService.check_dataset_model_setting(dataset) document = DocumentService.get_document(dataset.id, document_data["original_document_id"]) if document.display_status != 'available': @@ -1041,7 +1082,7 @@ def document_create_args_validate(cls, args: dict): DocumentService.process_rule_args_validate(args) else: if ('data_source' not in args and not args['data_source']) \ - and ('process_rule' not in args and not args['process_rule']): + and ('process_rule' not in args and not args['process_rule']): raise ValueError("Data source or Process rule is required") else: if args.get('data_source'): @@ -1103,7 +1144,7 @@ def process_rule_args_validate(cls, args: dict): raise ValueError("Process rule rules is invalid") if 'pre_processing_rules' not in args['process_rule']['rules'] \ - or args['process_rule']['rules']['pre_processing_rules'] is None: + or args['process_rule']['rules']['pre_processing_rules'] is None: raise ValueError("Process rule pre_processing_rules is required") if not isinstance(args['process_rule']['rules']['pre_processing_rules'], list): @@ -1128,21 +1169,21 @@ def process_rule_args_validate(cls, args: dict): args['process_rule']['rules']['pre_processing_rules'] = list(unique_pre_processing_rule_dicts.values()) if 'segmentation' not in args['process_rule']['rules'] \ - or args['process_rule']['rules']['segmentation'] is None: + or args['process_rule']['rules']['segmentation'] is None: raise ValueError("Process rule segmentation is required") if not isinstance(args['process_rule']['rules']['segmentation'], dict): raise ValueError("Process rule segmentation is invalid") if 'separator' not in args['process_rule']['rules']['segmentation'] \ - or not args['process_rule']['rules']['segmentation']['separator']: + or not args['process_rule']['rules']['segmentation']['separator']: raise ValueError("Process rule segmentation separator is required") if not isinstance(args['process_rule']['rules']['segmentation']['separator'], str): raise ValueError("Process rule segmentation separator is invalid") if 'max_tokens' not in args['process_rule']['rules']['segmentation'] \ - or not args['process_rule']['rules']['segmentation']['max_tokens']: + or not args['process_rule']['rules']['segmentation']['max_tokens']: raise ValueError("Process rule segmentation max_tokens is required") if not isinstance(args['process_rule']['rules']['segmentation']['max_tokens'], int): @@ -1178,7 +1219,7 @@ def estimate_args_validate(cls, args: dict): raise ValueError("Process rule rules is invalid") if 'pre_processing_rules' not in args['process_rule']['rules'] \ - or args['process_rule']['rules']['pre_processing_rules'] is None: + or args['process_rule']['rules']['pre_processing_rules'] is None: raise ValueError("Process rule pre_processing_rules is required") if not isinstance(args['process_rule']['rules']['pre_processing_rules'], list): @@ -1203,21 +1244,21 @@ def estimate_args_validate(cls, args: dict): args['process_rule']['rules']['pre_processing_rules'] = list(unique_pre_processing_rule_dicts.values()) if 'segmentation' not in args['process_rule']['rules'] \ - or args['process_rule']['rules']['segmentation'] is None: + or args['process_rule']['rules']['segmentation'] is None: raise ValueError("Process rule segmentation is required") if not isinstance(args['process_rule']['rules']['segmentation'], dict): raise ValueError("Process rule segmentation is invalid") if 'separator' not in args['process_rule']['rules']['segmentation'] \ - or not args['process_rule']['rules']['segmentation']['separator']: + or not args['process_rule']['rules']['segmentation']['separator']: raise ValueError("Process rule segmentation separator is required") if not isinstance(args['process_rule']['rules']['segmentation']['separator'], str): raise ValueError("Process rule segmentation separator is invalid") if 'max_tokens' not in args['process_rule']['rules']['segmentation'] \ - or not args['process_rule']['rules']['segmentation']['max_tokens']: + or not args['process_rule']['rules']['segmentation']['max_tokens']: raise ValueError("Process rule segmentation max_tokens is required") if not isinstance(args['process_rule']['rules']['segmentation']['max_tokens'], int): @@ -1471,12 +1512,16 @@ def delete_segment(cls, segment: DocumentSegment, document: Document, dataset: D class DatasetCollectionBindingService: @classmethod - def get_dataset_collection_binding(cls, provider_name: str, model_name: str, - collection_type: str = 'dataset') -> DatasetCollectionBinding: + def get_dataset_collection_binding( + cls, provider_name: str, model_name: str, + collection_type: str = 'dataset' + ) -> DatasetCollectionBinding: dataset_collection_binding = db.session.query(DatasetCollectionBinding). \ - filter(DatasetCollectionBinding.provider_name == provider_name, - DatasetCollectionBinding.model_name == model_name, - DatasetCollectionBinding.type == collection_type). \ + filter( + DatasetCollectionBinding.provider_name == provider_name, + DatasetCollectionBinding.model_name == model_name, + DatasetCollectionBinding.type == collection_type + ). \ order_by(DatasetCollectionBinding.created_at). \ first() @@ -1492,11 +1537,15 @@ def get_dataset_collection_binding(cls, provider_name: str, model_name: str, return dataset_collection_binding @classmethod - def get_dataset_collection_binding_by_id_and_type(cls, collection_binding_id: str, - collection_type: str = 'dataset') -> DatasetCollectionBinding: + def get_dataset_collection_binding_by_id_and_type( + cls, collection_binding_id: str, + collection_type: str = 'dataset' + ) -> DatasetCollectionBinding: dataset_collection_binding = db.session.query(DatasetCollectionBinding). \ - filter(DatasetCollectionBinding.id == collection_binding_id, - DatasetCollectionBinding.type == collection_type). \ + filter( + DatasetCollectionBinding.id == collection_binding_id, + DatasetCollectionBinding.type == collection_type + ). \ order_by(DatasetCollectionBinding.created_at). \ first() From e2352dd807d6a72e6709d7f6abfb788ad32a0c5c Mon Sep 17 00:00:00 2001 From: Joe <1264204425@qq.com> Date: Fri, 28 Jun 2024 17:57:21 +0800 Subject: [PATCH 03/25] chore: remove print --- api/services/dataset_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index ca494c4dcff2fd..a6556549e11118 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -94,7 +94,6 @@ def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, s max_per_page=100, error_out=False ) - print("================第一次", datasets.items, datasets.total) # check datasets permission, assuming this function exists if user and user.current_role != TenantAccountRole.DATASET_OPERATOR: datasets.items, datasets.total = DatasetService.filter_datasets_by_permission( From d30c13891b37608003e920b7ce07a3fbfa1f35cc Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 28 Jun 2024 20:20:23 +0800 Subject: [PATCH 04/25] feat: add fix-app-site-missing command (#5711) --- api/commands.py | 42 ++++++++++++++++++++++++++++++++++++++++++ api/models/account.py | 3 +-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/api/commands.py b/api/commands.py index 91d77370236322..fa6221df52af18 100644 --- a/api/commands.py +++ b/api/commands.py @@ -12,6 +12,7 @@ from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_type import VectorType from core.rag.models.document import Document +from events.app_event import app_was_created from extensions.ext_database import db from extensions.ext_redis import redis_client from libs.helper import email as email_validate @@ -585,6 +586,46 @@ def upgrade_db(): click.echo('Database migration skipped') +@click.command('fix-app-site-missing', help='Fix app related site missing issue.') +def fix_app_site_missing(): + """ + Fix app related site missing issue. + """ + click.echo(click.style('Start fix app related site missing issue.', fg='green')) + + while True: + try: + sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id +where sites.id is null limit 1000""" + with db.engine.begin() as conn: + rs = conn.execute(db.text(sql)) + + processed_count = 0 + for i in rs: + processed_count += 1 + app_id = str(i.id) + app = db.session.query(App).filter(App.id == app_id).first() + tenant = app.tenant + if tenant: + accounts = tenant.get_accounts() + if not accounts: + print("Fix app {} failed.".format(app.id)) + continue + + account = accounts[0] + print("Fix app {} related site missing issue.".format(app.id)) + app_was_created.send(app, account=account) + + if not processed_count: + break + except Exception as e: + click.echo(click.style('Fix app related site missing issue failed!', fg='red')) + logging.exception(f'Fix app related site missing issue failed, error: {e}') + continue + + click.echo(click.style('Congratulations! Fix app related site missing issue successful!', fg='green')) + + def register_commands(app): app.cli.add_command(reset_password) app.cli.add_command(reset_email) @@ -594,3 +635,4 @@ def register_commands(app): app.cli.add_command(add_qdrant_doc_id_index) app.cli.add_command(create_tenant) app.cli.add_command(upgrade_db) + app.cli.add_command(fix_app_site_missing) diff --git a/api/models/account.py b/api/models/account.py index 4911757b0759c3..3b258c4c82fe8f 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -153,8 +153,7 @@ class Tenant(db.Model): created_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) - def get_accounts(self) -> list[db.Model]: - Account = db.Model + def get_accounts(self) -> list[Account]: return db.session.query(Account).filter( Account.id == TenantAccountJoin.account_id, TenantAccountJoin.tenant_id == self.id From 8e5569f7732849f78131e0528f9f038d6a794621 Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 28 Jun 2024 20:33:53 +0800 Subject: [PATCH 05/25] fix: fix-app-site-missing command (#5714) --- api/commands.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/api/commands.py b/api/commands.py index fa6221df52af18..56217c898e22e2 100644 --- a/api/commands.py +++ b/api/commands.py @@ -593,17 +593,22 @@ def fix_app_site_missing(): """ click.echo(click.style('Start fix app related site missing issue.', fg='green')) + failed_app_ids = [] while True: - try: - sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id + sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id where sites.id is null limit 1000""" - with db.engine.begin() as conn: - rs = conn.execute(db.text(sql)) + with db.engine.begin() as conn: + rs = conn.execute(db.text(sql)) + + processed_count = 0 + for i in rs: + processed_count += 1 + app_id = str(i.id) + + if app_id in failed_app_ids: + continue - processed_count = 0 - for i in rs: - processed_count += 1 - app_id = str(i.id) + try: app = db.session.query(App).filter(App.id == app_id).first() tenant = app.tenant if tenant: @@ -615,13 +620,15 @@ def fix_app_site_missing(): account = accounts[0] print("Fix app {} related site missing issue.".format(app.id)) app_was_created.send(app, account=account) + except Exception as e: + failed_app_ids.append(app_id) + click.echo(click.style('Fix app {} related site missing issue failed!'.format(app_id), fg='red')) + logging.exception(f'Fix app related site missing issue failed, error: {e}') + continue + + if not processed_count: + break - if not processed_count: - break - except Exception as e: - click.echo(click.style('Fix app related site missing issue failed!', fg='red')) - logging.exception(f'Fix app related site missing issue failed, error: {e}') - continue click.echo(click.style('Congratulations! Fix app related site missing issue successful!', fg='green')) From 0bf481747453053070b185272586a7c99b3a4767 Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 28 Jun 2024 21:00:00 +0800 Subject: [PATCH 06/25] fix: _convert_prompt_message_to_dict parameters err (#5716) --- api/core/app/task_pipeline/workflow_cycle_manage.py | 13 +++++++------ .../model_providers/moonshot/llm/llm.py | 2 +- .../model_runtime/model_providers/nvidia/llm/llm.py | 2 +- .../openai_api_compatible/llm/llm.py | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py index e79ac05a752e4e..513fc692ffbb85 100644 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ b/api/core/app/task_pipeline/workflow_cycle_manage.py @@ -167,13 +167,14 @@ def _workflow_run_failed( db.session.refresh(workflow_run) db.session.close() - trace_manager.add_trace_task( - TraceTask( - TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, - conversation_id=conversation_id, + if trace_manager: + trace_manager.add_trace_task( + TraceTask( + TraceTaskName.WORKFLOW_TRACE, + workflow_run=workflow_run, + conversation_id=conversation_id, + ) ) - ) return workflow_run diff --git a/api/core/model_runtime/model_providers/moonshot/llm/llm.py b/api/core/model_runtime/model_providers/moonshot/llm/llm.py index ef301b0f6c3897..17cf65dc3adf70 100644 --- a/api/core/model_runtime/model_providers/moonshot/llm/llm.py +++ b/api/core/model_runtime/model_providers/moonshot/llm/llm.py @@ -93,7 +93,7 @@ def _add_function_call(self, model: str, credentials: dict) -> None: }.intersection(model_schema.features or []): credentials['function_calling_type'] = 'tool_call' - def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: + def _convert_prompt_message_to_dict(self, message: PromptMessage, credentials: Optional[dict] = None) -> dict: """ Convert PromptMessage to dict for OpenAI API format """ diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py index 4b2dbf3d3a5242..11252b92115df7 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py +++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py @@ -200,7 +200,7 @@ def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptM endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions') elif 'server_url' in credentials: endpoint_url = server_url - data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages] + data['messages'] = [self._convert_prompt_message_to_dict(m, credentials) for m in prompt_messages] elif completion_type is LLMMode.COMPLETION: data['prompt'] = 'ping' if 'endpoint_url' in credentials: diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py index 36eae2042d1cc0..b76f460737ba6a 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py @@ -582,7 +582,7 @@ def _handle_generate_response(self, model: str, credentials: dict, response: req return result - def _convert_prompt_message_to_dict(self, message: PromptMessage, credentials: dict = None) -> dict: + def _convert_prompt_message_to_dict(self, message: PromptMessage, credentials: Optional[dict] = None) -> dict: """ Convert PromptMessage to dict for OpenAI API format """ From 2996358cf2cc30e26c1807e1c0975830973ad801 Mon Sep 17 00:00:00 2001 From: William Espegren <131612909+WilliamEspegren@users.noreply.github.com> Date: Fri, 28 Jun 2024 15:14:18 +0200 Subject: [PATCH 07/25] Ignore new middleware.env docker file (#5715) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5763fa74f6c660..0c7e5c712f008b 100644 --- a/.gitignore +++ b/.gitignore @@ -165,6 +165,7 @@ docker/volumes/milvus/* docker/volumes/chroma/* docker/nginx/conf.d/default.conf +docker/middleware.env sdks/python-client/build sdks/python-client/dist From 6d0cea5fe60e1b2290f7af724c50ce8297f31d7f Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 28 Jun 2024 22:00:19 +0800 Subject: [PATCH 08/25] bump to 0.6.12 (#5712) --- api/configs/packaging/__init__.py | 2 +- docker-legacy/docker-compose.yaml | 6 +++--- docker/docker-compose.yaml | 6 +++--- web/package.json | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py index 95ccb850ed3b40..1f036789afae5f 100644 --- a/api/configs/packaging/__init__.py +++ b/api/configs/packaging/__init__.py @@ -8,7 +8,7 @@ class PackagingInfo(BaseModel): CURRENT_VERSION: str = Field( description='Dify version', - default='0.6.11', + default='0.6.12', ) COMMIT_SHA: str = Field( diff --git a/docker-legacy/docker-compose.yaml b/docker-legacy/docker-compose.yaml index f157a17a53d108..1bb86bc222f1ae 100644 --- a/docker-legacy/docker-compose.yaml +++ b/docker-legacy/docker-compose.yaml @@ -2,7 +2,7 @@ version: '3' services: # API service api: - image: langgenius/dify-api:0.6.11 + image: langgenius/dify-api:0.6.12 restart: always environment: # Startup mode, 'api' starts the API server. @@ -222,7 +222,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.6.11 + image: langgenius/dify-api:0.6.12 restart: always environment: CONSOLE_WEB_URL: '' @@ -388,7 +388,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.6.11 + image: langgenius/dify-web:0.6.12 restart: always environment: # The base URL of console application api server, refers to the Console base URL of WEB service if console domain is diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index a7939dae11e92c..91a597f8d1e2f7 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -259,7 +259,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:0.6.11 + image: langgenius/dify-api:0.6.12 restart: always environment: # Use the shared environment variables. @@ -282,7 +282,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.6.11 + image: langgenius/dify-api:0.6.12 restart: always environment: # Use the shared environment variables. @@ -301,7 +301,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.6.11 + image: langgenius/dify-web:0.6.12 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} diff --git a/web/package.json b/web/package.json index 46ca4ba059d3d7..f61c6c9506ca79 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "dify-web", - "version": "0.6.11", + "version": "0.6.12", "private": true, "scripts": { "dev": "next dev", From d435230059237a144a74b66326f26b427572acda Mon Sep 17 00:00:00 2001 From: Chenhe Gu Date: Sat, 29 Jun 2024 00:29:44 +0800 Subject: [PATCH 09/25] add README for new docker/ directory (#5724) --- docker/README | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 docker/README diff --git a/docker/README b/docker/README new file mode 100644 index 00000000000000..83a1c70f9d0816 --- /dev/null +++ b/docker/README @@ -0,0 +1,41 @@ +## README for docker Deployment + +Welcome to the new `docker` directory for deploying Dify using Docker Compose. This README outlines the updates, deployment instructions, and migration details for existing users. + +### What's Updated +- **Persistent Environment Variables**: Environment variables are now managed through a `.env` file, ensuring that your configurations persist across deployments. +- **Unified Vector Database Services**: All vector database services are now managed from a single Docker Compose file `docker-compose.yaml`. You can switch between different vector databases by setting the `VECTOR_STORE` environment variable in your `.env` file. +- **Mandatory .env File**: A `.env` file is now required to run `docker compose up`. This file is crucial for configuring your deployment and for any custom settings to persist through upgrades. +- **Legacy Support**: Previous deployment files are now located in the `docker-legacy` directory and will no longer be maintained. + +### How to Deploy Dify with `docker-compose.yaml` +1. **Prerequisites**: Ensure Docker and Docker Compose are installed on your system. +2. **Environment Setup**: + - Navigate to the `docker` directory. + - Copy the `.env.example` file to a new file named `.env` by running `cp .env.example .env`. + - Customize the `.env` file as needed. Refer to the `.env.example` file for detailed configuration options. +3. **Running the Services**: + - Execute `docker compose up` from the `docker` directory to start the services. + - To specify a vector database, set the `VECTOR_store` variable in your `.env` file to your desired vector database service, such as `milvus`, `weaviate`, or `opensearch`. + +### How to Deploy Middleware for Developing Dify +1. **Middleware Setup**: + - Use the `docker-compose.middleware.yaml` for setting up essential middleware services like databases and caches. + - Navigate to the `docker` directory. + - Ensure the `middleware.env` file is created by running `cp middleware.env.example middleware.env` (refer to the `middleware.env.example` file). +2. **Running Middleware Services**: + - Execute `docker-compose -f docker-compose.middleware.yaml up -d` to start the middleware services. + +### Migration for Existing Users +For users migrating from the `docker-legacy` setup: +1. **Review Changes**: Familiarize yourself with the new `.env` configuration and Docker Compose setup. +2. **Transfer Customizations**: + - If you have customized configurations such as `docker-compose.yaml`, `ssrf_proxy/squid.conf`, or `nginx/conf.d/default.conf`, you will need to reflect these changes in the `.env` file you create. +3. **Data Migration**: + - Ensure that data from services like databases and caches is backed up and migrated appropriately to the new structure if necessary. + +### Additional Information +- **Continuous Improvement Phase**: We are actively seeking feedback from the community to refine and enhance the deployment process. As more users adopt this new method, we will continue to make improvements based on your experiences and suggestions. +- **Support**: For detailed configuration options and environment variable settings, refer to the `.env.example` file and the Docker Compose configuration files in the `docker` directory. + +This README aims to guide you through the deployment process using the new Docker Compose setup. For any issues or further assistance, please refer to the official documentation or contact support. \ No newline at end of file From f33ef92f0c3e635667f89c09def836d796d37937 Mon Sep 17 00:00:00 2001 From: Chenhe Gu Date: Sat, 29 Jun 2024 00:48:34 +0800 Subject: [PATCH 10/25] Chore/set entrypoint scripts permissions (#5726) --- docker/docker-compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 91a597f8d1e2f7..e0178ff49a6d44 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -375,7 +375,7 @@ services: volumes: - ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template - ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint.sh - entrypoint: /docker-entrypoint.sh + entrypoint: ["sh", "-c", "chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh"] environment: # pls clearly modify the squid env vars to fit your network environment. HTTP_PORT: ${HTTP_PORT:-3128} @@ -398,7 +398,7 @@ services: - ./nginx/conf.d:/etc/nginx/conf.d - ./nginx/docker-entrypoint.sh:/docker-entrypoint.sh - ./nginx/ssl:/etc/ssl - entrypoint: /docker-entrypoint.sh + entrypoint: ["sh", "-c", "chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh"] environment: NGINX_SERVER_NAME: ${NGINX_SERVER_NAME:-_} HTTPS_ENABLED: ${HTTPS_ENABLED:-false} From a6356be3488b395e6c227ae0db79d0705904b710 Mon Sep 17 00:00:00 2001 From: Chenhe Gu Date: Sat, 29 Jun 2024 00:53:14 +0800 Subject: [PATCH 11/25] Rename README to README.md (#5727) --- docker/{README => README.md} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename docker/{README => README.md} (99%) diff --git a/docker/README b/docker/README.md similarity index 99% rename from docker/README rename to docker/README.md index 83a1c70f9d0816..76605167cb3cfa 100644 --- a/docker/README +++ b/docker/README.md @@ -38,4 +38,4 @@ For users migrating from the `docker-legacy` setup: - **Continuous Improvement Phase**: We are actively seeking feedback from the community to refine and enhance the deployment process. As more users adopt this new method, we will continue to make improvements based on your experiences and suggestions. - **Support**: For detailed configuration options and environment variable settings, refer to the `.env.example` file and the Docker Compose configuration files in the `docker` directory. -This README aims to guide you through the deployment process using the new Docker Compose setup. For any issues or further assistance, please refer to the official documentation or contact support. \ No newline at end of file +This README aims to guide you through the deployment process using the new Docker Compose setup. For any issues or further assistance, please refer to the official documentation or contact support. From 9513155fa46bc9bd1ed06381c163a831aea57d42 Mon Sep 17 00:00:00 2001 From: Nam Vu Date: Sat, 29 Jun 2024 10:24:25 +0700 Subject: [PATCH 12/25] chore: support both $$ and $ latex format (#5723) --- web/app/components/base/markdown.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/app/components/base/markdown.tsx b/web/app/components/base/markdown.tsx index 37d9ad691d2817..14a630b2ed74a7 100644 --- a/web/app/components/base/markdown.tsx +++ b/web/app/components/base/markdown.tsx @@ -44,7 +44,8 @@ const preprocessLaTeX = (content: string) => { if (typeof content !== 'string') return content return content.replace(/\\\[(.*?)\\\]/gs, (_, equation) => `$$${equation}$$`) - .replace(/\\\((.*?)\\\)/gs, (_, equation) => `$${equation}$`) + .replace(/\\\((.*?)\\\)/gs, (_, equation) => `$$${equation}$$`) + .replace(/(^|[^\\])\$(.+?)\$/gs, (_, prefix, equation) => `${prefix}$${equation}$`) } export function PreCode(props: { children: any }) { From 906857b28ae1470550406a24d9611de394f82606 Mon Sep 17 00:00:00 2001 From: takatost Date: Sat, 29 Jun 2024 17:07:21 +0800 Subject: [PATCH 13/25] fix: couldn't log in or resetup after a failed setup (#5739) --- api/controllers/console/setup.py | 22 +++--------- api/services/account_service.py | 58 ++++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/api/controllers/console/setup.py b/api/controllers/console/setup.py index a8fdde2791c426..def50212a18b82 100644 --- a/api/controllers/console/setup.py +++ b/api/controllers/console/setup.py @@ -3,11 +3,10 @@ from flask import current_app, request from flask_restful import Resource, reqparse -from extensions.ext_database import db from libs.helper import email, get_remote_ip, str_len from libs.password import valid_password from models.model import DifySetup -from services.account_service import AccountService, RegisterService, TenantService +from services.account_service import RegisterService, TenantService from . import api from .error import AlreadySetupError, NotInitValidateError, NotSetupError @@ -51,28 +50,17 @@ def post(self): required=True, location='json') args = parser.parse_args() - # Register - account = RegisterService.register( + # setup + RegisterService.setup( email=args['email'], name=args['name'], - password=args['password'] + password=args['password'], + ip_address=get_remote_ip(request) ) - TenantService.create_owner_tenant_if_not_exist(account) - - setup() - AccountService.update_last_login(account, ip_address=get_remote_ip(request)) - return {'result': 'success'}, 201 -def setup(): - dify_setup = DifySetup( - version=current_app.config['CURRENT_VERSION'] - ) - db.session.add(dify_setup) - - def setup_required(view): @wraps(view) def decorated(*args, **kwargs): diff --git a/api/services/account_service.py b/api/services/account_service.py index 2c401aad911791..5671da6d620e12 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -17,6 +17,7 @@ from libs.password import compare_password, hash_password, valid_password from libs.rsa import generate_key_pair from models.account import * +from models.model import DifySetup from services.errors.account import ( AccountAlreadyInTenantError, AccountLoginError, @@ -119,10 +120,11 @@ def update_account_password(account, password, new_password): return account @staticmethod - def create_account(email: str, name: str, interface_language: str, - password: str = None, - interface_theme: str = 'light', - timezone: str = 'America/New_York', ) -> Account: + def create_account(email: str, + name: str, + interface_language: str, + password: Optional[str] = None, + interface_theme: str = 'light') -> Account: """create account""" account = Account() account.email = email @@ -200,7 +202,6 @@ def update_last_login(account: Account, *, ip_address: str) -> None: account.last_login_ip = ip_address db.session.add(account) db.session.commit() - logging.info(f'Account {account.id} logged in successfully.') @staticmethod def login(account: Account, *, ip_address: Optional[str] = None): @@ -444,8 +445,51 @@ def _get_invitation_token_key(cls, token: str) -> str: return f'member_invite:token:{token}' @classmethod - def register(cls, email, name, password: str = None, open_id: str = None, provider: str = None, - language: str = None, status: AccountStatus = None) -> Account: + def setup(cls, email: str, name: str, password: str, ip_address: str) -> None: + """ + Setup dify + + :param email: email + :param name: username + :param password: password + :param ip_address: ip address + """ + try: + # Register + account = AccountService.create_account( + email=email, + name=name, + interface_language=languages[0], + password=password, + ) + + account.last_login_ip = ip_address + account.initialized_at = datetime.now(timezone.utc).replace(tzinfo=None) + + TenantService.create_owner_tenant_if_not_exist(account) + + dify_setup = DifySetup( + version=current_app.config['CURRENT_VERSION'] + ) + db.session.add(dify_setup) + db.session.commit() + except Exception as e: + db.session.query(DifySetup).delete() + db.session.query(TenantAccountJoin).delete() + db.session.query(Account).delete() + db.session.query(Tenant).delete() + db.session.commit() + + logging.exception(f'Setup failed: {e}') + raise ValueError(f'Setup failed: {e}') + + @classmethod + def register(cls, email, name, + password: Optional[str] = None, + open_id: Optional[str] = None, + provider: Optional[str] = None, + language: Optional[str] = None, + status: Optional[AccountStatus] = None) -> Account: db.session.begin_nested() """Register account""" try: From d56cedfc67dda04ddec885d87e93d5cc3f0a4770 Mon Sep 17 00:00:00 2001 From: takatost Date: Sat, 29 Jun 2024 17:15:25 +0800 Subject: [PATCH 14/25] fix: app config does not use empty string in the env (#5741) --- api/configs/app_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/api/configs/app_config.py b/api/configs/app_config.py index f3bab64fb2d0be..4467b84c8666e6 100644 --- a/api/configs/app_config.py +++ b/api/configs/app_config.py @@ -36,7 +36,6 @@ class DifyConfig( # read from dotenv format config file env_file='.env', env_file_encoding='utf-8', - env_ignore_empty=True, # ignore extra attributes extra='ignore', From 0b8faade6fc530370cf0bbdb337ad60ebf78672c Mon Sep 17 00:00:00 2001 From: takatost Date: Sat, 29 Jun 2024 17:34:12 +0800 Subject: [PATCH 15/25] fix: env SMTP_PORT is empty caused err when launching (#5742) --- docker/.env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/.env.example b/docker/.env.example index 48288c556f85b4..7ff82530fa1589 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -405,7 +405,7 @@ RESEND_API_KEY=your-resend-api-key # SMTP server configuration, used when MAIL_TYPE is `smtp` SMTP_SERVER= -SMTP_PORT= +SMTP_PORT=465 SMTP_USERNAME= SMTP_PASSWORD= SMTP_USE_TLS=true From 8fd75e6965a33930967a44a03a1b8640095d6f4a Mon Sep 17 00:00:00 2001 From: takatost Date: Sat, 29 Jun 2024 17:43:20 +0800 Subject: [PATCH 16/25] bump to 0.6.12-fix1 (#5743) --- api/configs/packaging/__init__.py | 2 +- docker-legacy/docker-compose.yaml | 6 +++--- docker/docker-compose.yaml | 6 +++--- web/package.json | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py index 1f036789afae5f..e9b389df8d3f97 100644 --- a/api/configs/packaging/__init__.py +++ b/api/configs/packaging/__init__.py @@ -8,7 +8,7 @@ class PackagingInfo(BaseModel): CURRENT_VERSION: str = Field( description='Dify version', - default='0.6.12', + default='0.6.12-fix1', ) COMMIT_SHA: str = Field( diff --git a/docker-legacy/docker-compose.yaml b/docker-legacy/docker-compose.yaml index 1bb86bc222f1ae..eadaaced2c583d 100644 --- a/docker-legacy/docker-compose.yaml +++ b/docker-legacy/docker-compose.yaml @@ -2,7 +2,7 @@ version: '3' services: # API service api: - image: langgenius/dify-api:0.6.12 + image: langgenius/dify-api:0.6.12-fix1 restart: always environment: # Startup mode, 'api' starts the API server. @@ -222,7 +222,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.6.12 + image: langgenius/dify-api:0.6.12-fix1 restart: always environment: CONSOLE_WEB_URL: '' @@ -388,7 +388,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.6.12 + image: langgenius/dify-web:0.6.12-fix1 restart: always environment: # The base URL of console application api server, refers to the Console base URL of WEB service if console domain is diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index e0178ff49a6d44..9d7fa17f8def2f 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -259,7 +259,7 @@ x-shared-env: &shared-api-worker-env services: # API service api: - image: langgenius/dify-api:0.6.12 + image: langgenius/dify-api:0.6.12-fix1 restart: always environment: # Use the shared environment variables. @@ -282,7 +282,7 @@ services: # worker service # The Celery worker for processing the queue. worker: - image: langgenius/dify-api:0.6.12 + image: langgenius/dify-api:0.6.12-fix1 restart: always environment: # Use the shared environment variables. @@ -301,7 +301,7 @@ services: # Frontend web application. web: - image: langgenius/dify-web:0.6.12 + image: langgenius/dify-web:0.6.12-fix1 restart: always environment: CONSOLE_API_URL: ${CONSOLE_API_URL:-} diff --git a/web/package.json b/web/package.json index f61c6c9506ca79..71819c176c1b11 100644 --- a/web/package.json +++ b/web/package.json @@ -1,6 +1,6 @@ { "name": "dify-web", - "version": "0.6.12", + "version": "0.6.12-fix1", "private": true, "scripts": { "dev": "next dev", From cdf64d4ee270f5bef7d66073e766601a35168330 Mon Sep 17 00:00:00 2001 From: takatost Date: Sat, 29 Jun 2024 18:35:32 +0800 Subject: [PATCH 17/25] Update docker-compose.yaml (#5745) --- docker/docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 9d7fa17f8def2f..2d25f93392980c 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -232,7 +232,7 @@ x-shared-env: &shared-api-worker-env # default send from email address, if not specified MAIL_DEFAULT_SEND_FROM: ${MAIL_DEFAULT_SEND_FROM} SMTP_SERVER: ${SMTP_SERVER} - SMTP_PORT: ${SMTP_PORT} + SMTP_PORT: ${SMTP_PORT:-465} SMTP_USERNAME: ${SMTP_USERNAME} SMTP_PASSWORD: ${SMTP_PASSWORD} SMTP_USE_TLS: ${SMTP_USE_TLS} From 1e045a0187d2e637c6156a78c12778b842b89ab1 Mon Sep 17 00:00:00 2001 From: takatost Date: Sat, 29 Jun 2024 20:28:30 +0800 Subject: [PATCH 18/25] fix: slow sql of ops tracing (#5749) --- .github/workflows/db-migration-test.yml | 5 +++ api/core/ops/ops_trace_manager.py | 11 +++++-- ...6_add_workflow_run_id_index_for_message.py | 32 +++++++++++++++++++ api/models/model.py | 1 + 4 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 api/migrations/versions/b2602e131636_add_workflow_run_id_index_for_message.py diff --git a/.github/workflows/db-migration-test.yml b/.github/workflows/db-migration-test.yml index 64e8eb291c466e..67d1558dbcaaff 100644 --- a/.github/workflows/db-migration-test.yml +++ b/.github/workflows/db-migration-test.yml @@ -38,6 +38,11 @@ jobs: - name: Install dependencies run: poetry install -C api + - name: Prepare middleware env + run: | + cd docker + cp middleware.env.example middleware.env + - name: Set up Middlewares uses: hoverkraft-tech/compose-action@v2.0.0 with: diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index fbdc12331e3a7b..ff15aa999b4b46 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -352,10 +352,17 @@ def workflow_trace(self, workflow_run: WorkflowRun, conversation_id): query = workflow_run_inputs.get("query") or workflow_run_inputs.get("sys.query") or "" # get workflow_app_log_id - workflow_app_log_data = db.session.query(WorkflowAppLog).filter_by(workflow_run_id=workflow_run.id).first() + workflow_app_log_data = db.session.query(WorkflowAppLog).filter_by( + tenant_id=tenant_id, + app_id=workflow_run.app_id, + workflow_run_id=workflow_run.id + ).first() workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None # get message_id - message_data = db.session.query(Message.id).filter_by(workflow_run_id=workflow_run_id).first() + message_data = db.session.query(Message.id).filter_by( + conversation_id=conversation_id, + workflow_run_id=workflow_run_id + ).first() message_id = str(message_data.id) if message_data else None metadata = { diff --git a/api/migrations/versions/b2602e131636_add_workflow_run_id_index_for_message.py b/api/migrations/versions/b2602e131636_add_workflow_run_id_index_for_message.py new file mode 100644 index 00000000000000..c9a6a5a5a7d90f --- /dev/null +++ b/api/migrations/versions/b2602e131636_add_workflow_run_id_index_for_message.py @@ -0,0 +1,32 @@ +"""add workflow_run_id index for message + +Revision ID: b2602e131636 +Revises: 63f9175e515b +Create Date: 2024-06-29 12:16:51.646346 + +""" +from alembic import op + +import models as models + +# revision identifiers, used by Alembic. +revision = 'b2602e131636' +down_revision = '63f9175e515b' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.create_index('message_workflow_run_id_idx', ['conversation_id', 'workflow_run_id'], unique=False) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('messages', schema=None) as batch_op: + batch_op.drop_index('message_workflow_run_id_idx') + + # ### end Alembic commands ### diff --git a/api/models/model.py b/api/models/model.py index 07d7f6d8917f8e..f59e8ebb7c0f58 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -626,6 +626,7 @@ class Message(db.Model): db.Index('message_conversation_id_idx', 'conversation_id'), db.Index('message_end_user_idx', 'app_id', 'from_source', 'from_end_user_id'), db.Index('message_account_idx', 'app_id', 'from_source', 'from_account_id'), + db.Index('message_workflow_run_id_idx', 'conversation_id', 'workflow_run_id') ) id = db.Column(StringUUID, server_default=db.text('uuid_generate_v4()')) From fc0f75d13b92e8c04f44403221052ff9d8100d97 Mon Sep 17 00:00:00 2001 From: Chenhe Gu Date: Sat, 29 Jun 2024 22:09:59 +0800 Subject: [PATCH 19/25] Docs/add docker dotenv notes (#5750) --- docker/README.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/docker/README.md b/docker/README.md index 76605167cb3cfa..6bff8bc3142ccc 100644 --- a/docker/README.md +++ b/docker/README.md @@ -4,6 +4,10 @@ Welcome to the new `docker` directory for deploying Dify using Docker Compose. T ### What's Updated - **Persistent Environment Variables**: Environment variables are now managed through a `.env` file, ensuring that your configurations persist across deployments. + + > What is `.env`?

+ > The `.env` file is a crucial component in Docker and Docker Compose environments, serving as a centralized configuration file where you can define environment variables that are accessible to the containers at runtime. This file simplifies the management of environment settings across different stages of development, testing, and production, providing consistency and ease of configuration to deployments. + - **Unified Vector Database Services**: All vector database services are now managed from a single Docker Compose file `docker-compose.yaml`. You can switch between different vector databases by setting the `VECTOR_STORE` environment variable in your `.env` file. - **Mandatory .env File**: A `.env` file is now required to run `docker compose up`. This file is crucial for configuring your deployment and for any custom settings to persist through upgrades. - **Legacy Support**: Previous deployment files are now located in the `docker-legacy` directory and will no longer be maintained. @@ -34,8 +38,51 @@ For users migrating from the `docker-legacy` setup: 3. **Data Migration**: - Ensure that data from services like databases and caches is backed up and migrated appropriately to the new structure if necessary. +### Overview of `.env` + +#### Key Modules and Customization + +- **Vector Database Services**: Depending on the type of vector database used (`VECTOR_STORE`), users can set specific endpoints, ports, and authentication details. +- **Storage Services**: Depending on the storage type (`STORAGE_TYPE`), users can configure specific settings for S3, Azure Blob, Google Storage, etc. +- **API and Web Services**: Users can define URLs and other settings that affect how the API and web frontends operate. + +#### Other notable variables +The `.env.example` file provided in the Docker setup is extensive and covers a wide range of configuration options. It is structured into several sections, each pertaining to different aspects of the application and its services. Here are some of the key sections and variables: + +1. **Common Variables**: + - `CONSOLE_API_URL`, `SERVICE_API_URL`: URLs for different API services. + - `APP_WEB_URL`: Frontend application URL. + - `FILES_URL`: Base URL for file downloads and previews. + +2. **Server Configuration**: + - `LOG_LEVEL`, `DEBUG`, `FLASK_DEBUG`: Logging and debug settings. + - `SECRET_KEY`: A key for encrypting session cookies and other sensitive data. + +3. **Database Configuration**: + - `DB_USERNAME`, `DB_PASSWORD`, `DB_HOST`, `DB_PORT`, `DB_DATABASE`: PostgreSQL database credentials and connection details. + +4. **Redis Configuration**: + - `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD`: Redis server connection settings. + +5. **Celery Configuration**: + - `CELERY_BROKER_URL`: Configuration for Celery message broker. + +6. **Storage Configuration**: + - `STORAGE_TYPE`, `S3_BUCKET_NAME`, `AZURE_BLOB_ACCOUNT_NAME`: Settings for file storage options like local, S3, Azure Blob, etc. + +7. **Vector Database Configuration**: + - `VECTOR_STORE`: Type of vector database (e.g., `weaviate`, `milvus`). + - Specific settings for each vector store like `WEAVIATE_ENDPOINT`, `MILVUS_HOST`. + +8. **CORS Configuration**: + - `WEB_API_CORS_ALLOW_ORIGINS`, `CONSOLE_CORS_ALLOW_ORIGINS`: Settings for cross-origin resource sharing. + +9. **Other Service-Specific Environment Variables**: + - Each service like `nginx`, `redis`, `db`, and vector databases have specific environment variables that are directly referenced in the `docker-compose.yaml`. + + ### Additional Information - **Continuous Improvement Phase**: We are actively seeking feedback from the community to refine and enhance the deployment process. As more users adopt this new method, we will continue to make improvements based on your experiences and suggestions. - **Support**: For detailed configuration options and environment variable settings, refer to the `.env.example` file and the Docker Compose configuration files in the `docker` directory. -This README aims to guide you through the deployment process using the new Docker Compose setup. For any issues or further assistance, please refer to the official documentation or contact support. +This README aims to guide you through the deployment process using the new Docker Compose setup. For any issues or further assistance, please refer to the official documentation or contact support. \ No newline at end of file From f101fcd0e700ea324c4e915a0ca9a9a6004b5784 Mon Sep 17 00:00:00 2001 From: Yeuoly <45712896+Yeuoly@users.noreply.github.com> Date: Sat, 29 Jun 2024 23:29:43 +0800 Subject: [PATCH 20/25] fix: missing process data in parameter extractor (#5755) --- .../nodes/parameter_extractor/parameter_extractor_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py index ea0cdf96e7d0f3..2fb96679e4283a 100644 --- a/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/core/workflow/nodes/parameter_extractor/parameter_extractor_node.py @@ -131,7 +131,7 @@ def _run(self, variable_pool: VariablePool) -> NodeRunResult: return NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, inputs=inputs, - process_data={}, + process_data=process_data, outputs={ '__is_success': 0, '__reason': str(e) From ffb07eb24b58f8272558f4de4c5f2c3f6072ef49 Mon Sep 17 00:00:00 2001 From: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Date: Sat, 29 Jun 2024 23:32:52 +0800 Subject: [PATCH 21/25] fix: workflow trace none type error (#5758) --- api/core/ops/langfuse_trace/langfuse_trace.py | 4 +++- api/core/ops/langsmith_trace/langsmith_trace.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index 46795c8c3cc4aa..06eb3e2f43838d 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -121,7 +121,9 @@ def workflow_trace(self, trace_info: WorkflowTraceInfo): node_type = node_execution.node_type status = node_execution.status if node_type == "llm": - inputs = json.loads(node_execution.process_data).get("prompts", {}) + inputs = json.loads(node_execution.process_data).get( + "prompts", {} + ) if node_execution.process_data else {} else: inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} outputs = ( diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index 422830fb1e4df4..1dbda601edf979 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -114,7 +114,9 @@ def workflow_trace(self, trace_info: WorkflowTraceInfo): node_type = node_execution.node_type status = node_execution.status if node_type == "llm": - inputs = json.loads(node_execution.process_data).get("prompts", {}) + inputs = json.loads(node_execution.process_data).get( + "prompts", {} + ) if node_execution.process_data else {} else: inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} outputs = ( From f637ae4794a09a7d800d4e4d80e825c3e079529f Mon Sep 17 00:00:00 2001 From: Joe <79627742+ZhouhaoJiang@users.noreply.github.com> Date: Sun, 30 Jun 2024 01:12:16 +0800 Subject: [PATCH 22/25] fix: langsmith message_trace end_user_data session_id error (#5759) --- api/core/ops/langfuse_trace/langfuse_trace.py | 4 +++- api/core/ops/langsmith_trace/langsmith_trace.py | 10 ++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index 06eb3e2f43838d..5b5d5def1bd541 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -215,7 +215,9 @@ def message_trace( end_user_data: EndUser = db.session.query(EndUser).filter( EndUser.id == message_data.from_end_user_id ).first() - user_id = end_user_data.session_id + if end_user_data is not None: + user_id = end_user_data.session_id + metadata["user_id"] = user_id trace_data = LangfuseTrace( id=message_id, diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index 1dbda601edf979..0fee076d55a47b 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -183,13 +183,15 @@ def message_trace(self, trace_info: MessageTraceInfo): message_id = message_data.id user_id = message_data.from_account_id + metadata["user_id"] = user_id + if message_data.from_end_user_id: end_user_data: EndUser = db.session.query(EndUser).filter( EndUser.id == message_data.from_end_user_id - ).first().session_id - end_user_id = end_user_data.session_id - metadata["end_user_id"] = end_user_id - metadata["user_id"] = user_id + ).first() + if end_user_data is not None: + end_user_id = end_user_data.session_id + metadata["end_user_id"] = end_user_id message_run = LangSmithRunModel( input_tokens=trace_info.message_tokens, From eab0ac3a13bea3a2f952fa42eccf2a12cd3e2508 Mon Sep 17 00:00:00 2001 From: takatost Date: Sun, 30 Jun 2024 10:31:31 +0800 Subject: [PATCH 23/25] chore: remove port expose in docker compose (#5754) Co-authored-by: Chenhe Gu --- .github/workflows/api-tests.yml | 3 ++ .github/workflows/expose_service_ports.sh | 10 ++++ docker/.env.example | 10 +++- docker/docker-compose.middleware.yaml | 64 ++++++++++++----------- docker/docker-compose.yaml | 49 ++--------------- docker/middleware.env.example | 9 ++++ 6 files changed, 66 insertions(+), 79 deletions(-) create mode 100755 .github/workflows/expose_service_ports.sh diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index e67b33bed51129..e424171019a780 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -60,6 +60,9 @@ jobs: cp docker/.env.example docker/.env cp docker/middleware.env.example docker/middleware.env + - name: Expose Service Ports + run: sh .github/workflows/expose_service_ports.sh + - name: Set up Sandbox uses: hoverkraft-tech/compose-action@v2.0.0 with: diff --git a/.github/workflows/expose_service_ports.sh b/.github/workflows/expose_service_ports.sh new file mode 100755 index 00000000000000..3418bf0c6f6688 --- /dev/null +++ b/.github/workflows/expose_service_ports.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +yq eval '.services.weaviate.ports += ["8080:8080"]' -i docker/docker-compose.yaml +yq eval '.services.qdrant.ports += ["6333:6333"]' -i docker/docker-compose.yaml +yq eval '.services.chroma.ports += ["8000:8000"]' -i docker/docker-compose.yaml +yq eval '.services["milvus-standalone"].ports += ["19530:19530"]' -i docker/docker-compose.yaml +yq eval '.services.pgvector.ports += ["5433:5432"]' -i docker/docker-compose.yaml +yq eval '.services["pgvecto-rs"].ports += ["5431:5432"]' -i docker/docker-compose.yaml + +echo "Ports exposed for sandbox, weaviate, qdrant, chroma, milvus, pgvector, pgvecto-rs." \ No newline at end of file diff --git a/docker/.env.example b/docker/.env.example index 7ff82530fa1589..eb4a04351f1356 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -585,7 +585,7 @@ NGINX_PROXY_SEND_TIMEOUT=3600s # ------------------------------ # Environment Variables for SSRF Proxy # ------------------------------ -HTTP_PORT=3128 +SSRF_HTTP_PORT=3128 COREDUMP_DIR=/var/spool/squid REVERSE_PROXY_PORT=8194 SANDBOX_HOST=sandbox @@ -595,4 +595,10 @@ SANDBOX_HOST=sandbox # (based on the vector db type, the corresponding docker # compose profile will be used) # ------------------------------ -COMPOSE_PROFILES=${VECTOR_STORE:-weaviate} \ No newline at end of file +COMPOSE_PROFILES=${VECTOR_STORE:-weaviate} + +# ------------------------------ +# Docker Compose Service Expose Host Port Configurations +# ------------------------------ +EXPOSE_NGINX_PORT=80 +EXPOSE_NGINX_SSL_PORT=443 diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index d031e01bf7b22b..ec0f5ba5e76137 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -12,7 +12,7 @@ services: volumes: - ./volumes/db/data:/var/lib/postgresql/data ports: - - "5432:5432" + - "${EXPOSE_POSTGRES_PORT:-5432}:5432" # The redis cache. redis: @@ -24,32 +24,7 @@ services: # Set the redis password when startup redis server. command: redis-server --requirepass difyai123456 ports: - - "6379:6379" - - # The Weaviate vector store. - weaviate: - image: semitechnologies/weaviate:1.19.0 - restart: always - volumes: - # Mount the Weaviate data directory to the container. - - ./volumes/weaviate:/var/lib/weaviate - env_file: - - ./middleware.env - environment: - # The Weaviate configurations - # You can refer to the [Weaviate](https://weaviate.io/developers/weaviate/config-refs/env-vars) documentation for more information. - PERSISTENCE_DATA_PATH: ${PERSISTENCE_DATA_PATH:-'/var/lib/weaviate'} - QUERY_DEFAULTS_LIMIT: ${QUERY_DEFAULTS_LIMIT:-25} - AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: ${AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED:-false} - DEFAULT_VECTORIZER_MODULE: ${DEFAULT_VECTORIZER_MODULE:-none} - CLUSTER_HOSTNAME: ${CLUSTER_HOSTNAME:-node1} - AUTHENTICATION_APIKEY_ENABLED: ${AUTHENTICATION_APIKEY_ENABLED:-true} - AUTHENTICATION_APIKEY_ALLOWED_KEYS: ${AUTHENTICATION_APIKEY_ALLOWED_KEYS:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih} - AUTHENTICATION_APIKEY_USERS: ${AUTHENTICATION_APIKEY_USERS:-hello@dify.ai} - AUTHORIZATION_ADMINLIST_ENABLED: ${AUTHORIZATION_ADMINLIST_ENABLED:-true} - AUTHORIZATION_ADMINLIST_USERS: ${AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} - ports: - - "8080:8080" + - "${EXPOSE_REDIS_PORT:-6379}:6379" # The DifySandbox sandbox: @@ -81,20 +56,47 @@ services: - ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template - ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint.sh entrypoint: /docker-entrypoint.sh - ports: - - "3128:3128" - - "8194:8194" environment: # pls clearly modify the squid env vars to fit your network environment. - HTTP_PORT: ${HTTP_PORT:-3128} + HTTP_PORT: ${SSRF_HTTP_PORT:-3128} COREDUMP_DIR: ${COREDUMP_DIR:-/var/spool/squid} REVERSE_PROXY_PORT: ${REVERSE_PROXY_PORT:-8194} SANDBOX_HOST: ${SANDBOX_HOST:-sandbox} SANDBOX_PORT: ${SANDBOX_PORT:-8194} + ports: + - "${EXPOSE_SSRF_PROXY_PORT:-3128}:${SSRF_HTTP_PORT:-3128}" + - "${EXPOSE_SANDBOX_PORT:-8194}:${SANDBOX_PORT:-8194}" networks: - ssrf_proxy_network - default + # The Weaviate vector store. + weaviate: + image: semitechnologies/weaviate:1.19.0 + profiles: + - weaviate + restart: always + volumes: + # Mount the Weaviate data directory to the container. + - ./volumes/weaviate:/var/lib/weaviate + env_file: + - ./middleware.env + environment: + # The Weaviate configurations + # You can refer to the [Weaviate](https://weaviate.io/developers/weaviate/config-refs/env-vars) documentation for more information. + PERSISTENCE_DATA_PATH: ${PERSISTENCE_DATA_PATH:-'/var/lib/weaviate'} + QUERY_DEFAULTS_LIMIT: ${QUERY_DEFAULTS_LIMIT:-25} + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: ${AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED:-false} + DEFAULT_VECTORIZER_MODULE: ${DEFAULT_VECTORIZER_MODULE:-none} + CLUSTER_HOSTNAME: ${CLUSTER_HOSTNAME:-node1} + AUTHENTICATION_APIKEY_ENABLED: ${AUTHENTICATION_APIKEY_ENABLED:-true} + AUTHENTICATION_APIKEY_ALLOWED_KEYS: ${AUTHENTICATION_APIKEY_ALLOWED_KEYS:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih} + AUTHENTICATION_APIKEY_USERS: ${AUTHENTICATION_APIKEY_USERS:-hello@dify.ai} + AUTHORIZATION_ADMINLIST_ENABLED: ${AUTHORIZATION_ADMINLIST_ENABLED:-true} + AUTHORIZATION_ADMINLIST_USERS: ${AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} + ports: + - "${EXPOSE_WEAVIATE_PORT:-8080}:8080" + networks: # create a network between sandbox, api and ssrf_proxy, and can not access outside. ssrf_proxy_network: diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 2d25f93392980c..0fbd8e24df26e2 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -272,9 +272,6 @@ services: volumes: # Mount the storage directory to the container, for storing user files. - ./volumes/app/storage:/app/api/storage - # uncomment to expose dify-api port to host - # ports: - # - "5001:5001" networks: - ssrf_proxy_network - default @@ -307,9 +304,6 @@ services: CONSOLE_API_URL: ${CONSOLE_API_URL:-} APP_API_URL: ${APP_API_URL:-} SENTRY_DSN: ${SENTRY_DSN:-} - # uncomment to expose dify-web port to host - # ports: - # - "3000:3000" # The postgres database. db: @@ -322,9 +316,6 @@ services: PGDATA: ${PGDATA:-/var/lib/postgresql/data/pgdata} volumes: - ./volumes/db/data:/var/lib/postgresql/data - # uncomment to expose db(postgresql) port to host - # ports: - # - "5432:5432" healthcheck: test: [ "CMD", "pg_isready" ] interval: 1s @@ -342,9 +333,6 @@ services: command: redis-server --requirepass ${REDIS_PASSWORD:-difyai123456} healthcheck: test: [ "CMD", "redis-cli", "ping" ] - # uncomment to expose redis port to host - # ports: - # - "6379:6379" # The DifySandbox sandbox: @@ -378,7 +366,7 @@ services: entrypoint: ["sh", "-c", "chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh"] environment: # pls clearly modify the squid env vars to fit your network environment. - HTTP_PORT: ${HTTP_PORT:-3128} + HTTP_PORT: ${SSRF_HTTP_PORT:-3128} COREDUMP_DIR: ${COREDUMP_DIR:-/var/spool/squid} REVERSE_PROXY_PORT: ${REVERSE_PROXY_PORT:-8194} SANDBOX_HOST: ${SANDBOX_HOST:-sandbox} @@ -417,8 +405,8 @@ services: - api - web ports: - - "${NGINX_PORT:-80}:80" - - "${NGINX_SSL_PORT:-443}:443" + - "${EXPOSE_NGINX_PORT:-80}:80" + - "${EXPOSE_NGINX_SSL_PORT:-443}:443" # The Weaviate vector store. weaviate: @@ -442,10 +430,6 @@ services: AUTHENTICATION_APIKEY_USERS: ${AUTHENTICATION_APIKEY_USERS:-hello@dify.ai} AUTHORIZATION_ADMINLIST_ENABLED: ${AUTHORIZATION_ADMINLIST_ENABLED:-true} AUTHORIZATION_ADMINLIST_USERS: ${AUTHORIZATION_ADMINLIST_USERS:-hello@dify.ai} - # uncomment to expose weaviate port to host - ports: - - "8080:8080" - # Qdrant vector store. # (if used, you need to set VECTOR_STORE to qdrant in the api & worker service.) @@ -458,10 +442,6 @@ services: - ./volumes/qdrant:/qdrant/storage environment: QDRANT_API_KEY: ${QDRANT_API_KEY:-difyai123456} - # uncomment to expose qdrant port to host - ports: - - "6333:6333" - - "6334:6334" # The pgvector vector database. pgvector: @@ -479,9 +459,6 @@ services: PGDATA: ${PGVECTOR_PGDATA:-/var/lib/postgresql/data/pgdata} volumes: - ./volumes/pgvector/data:/var/lib/postgresql/data - # uncomment to expose db(postgresql) port to host - ports: - - "5433:5432" healthcheck: test: [ "CMD", "pg_isready" ] interval: 1s @@ -504,9 +481,6 @@ services: PGDATA: ${PGDATA:-/var/lib/postgresql/data/pgdata} volumes: - ./volumes/pgvecto_rs/data:/var/lib/postgresql/data - # uncomment to expose db(postgresql) port to host - ports: - - "5431:5432" healthcheck: test: [ "CMD", "pg_isready" ] interval: 1s @@ -525,16 +499,12 @@ services: CHROMA_SERVER_AUTHN_CREDENTIALS: ${CHROMA_SERVER_AUTHN_CREDENTIALS:-difyai123456} CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider} IS_PERSISTENT: ${IS_PERSISTENT:-TRUE} - ports: - - "8000:8000" oracle: image: container-registry.oracle.com/database/free:latest profiles: - oracle restart: always - ports: - - 1521:1521 volumes: - type: volume source: oradata @@ -574,9 +544,6 @@ services: environment: MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin} - ports: - - "9001:9001" - - "9000:9000" volumes: - ./volumes/milvus/minio:/minio_data command: minio server /minio_data --console-address ":9001" @@ -606,9 +573,6 @@ services: start_period: 90s timeout: 20s retries: 3 - ports: - - "19530:19530" - - "9091:9091" depends_on: - "etcd" - "minio" @@ -634,9 +598,6 @@ services: hard: ${OPENSEARCH_NOFILE_HARD:-65536} volumes: - ./volumes/opensearch/data:/usr/share/opensearch/data - ports: - - "9200:9200" - - "9600:9600" networks: - opensearch-net @@ -645,10 +606,6 @@ services: image: opensearchproject/opensearch-dashboards:latest profiles: - opensearch - ports: - - "5601:5601" - expose: - - "5601" environment: OPENSEARCH_HOSTS: '["https://opensearch:9200"]' volumes: diff --git a/docker/middleware.env.example b/docker/middleware.env.example index e17d67f6129402..051a79d54eae98 100644 --- a/docker/middleware.env.example +++ b/docker/middleware.env.example @@ -40,3 +40,12 @@ AUTHENTICATION_APIKEY_ALLOWED_KEYS=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih AUTHENTICATION_APIKEY_USERS=hello@dify.ai AUTHORIZATION_ADMINLIST_ENABLED=true AUTHORIZATION_ADMINLIST_USERS=hello@dify.ai + +# ------------------------------ +# Docker Compose Service Expose Host Port Configurations +# ------------------------------ +EXPOSE_POSTGRES_PORT=5432 +EXPOSE_REDIS_PORT=6379 +EXPOSE_SANDBOX_PORT=8194 +EXPOSE_SSRF_PROXY_PORT=3128 +EXPOSE_WEAVIATE_PORT=8080 From 84cd2bd11f17f3cb483a662a1d3a863dfabe8a81 Mon Sep 17 00:00:00 2001 From: Joe <1264204425@qq.com> Date: Sun, 30 Jun 2024 17:39:58 +0800 Subject: [PATCH 24/25] feat: patch dataset when not partial_member_list refresh datasetp permission --- api/controllers/console/datasets/datasets.py | 4 ++++ api/services/dataset_service.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index a4b5a00e1c1248..67c2a2081aff8e 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -229,6 +229,10 @@ def patch(self, dataset_id): DatasetPermissionService.update_partial_member_list(dataset_id_str, data.get('partial_member_list')) part_users_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str) result_data.update({'partial_member_list': part_users_list}) + else: + partial_member_list = [] + DatasetPermissionService.update_partial_member_list(dataset_id_str, partial_member_list) + result_data.update({'partial_member_list': partial_member_list}) return result_data, 200 diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index a6556549e11118..a88fdcc55e9ce3 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -94,7 +94,7 @@ def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, s max_per_page=100, error_out=False ) - # check datasets permission, assuming this function exists + # check datasets permission, if user and user.current_role != TenantAccountRole.DATASET_OPERATOR: datasets.items, datasets.total = DatasetService.filter_datasets_by_permission( user, datasets From 7c80d257e556a9af500160dc9af3848d0e2d854c Mon Sep 17 00:00:00 2001 From: Joe <1264204425@qq.com> Date: Sun, 30 Jun 2024 19:32:24 +0800 Subject: [PATCH 25/25] feat: dataset operator permission --- api/controllers/console/datasets/datasets.py | 7 ++-- .../console/datasets/datasets_document.py | 7 +++- api/models/account.py | 9 +++-- api/services/dataset_service.py | 37 +++++++++++++------ 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 67c2a2081aff8e..6d010b55aacad7 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -115,7 +115,7 @@ def post(self): args = parser.parse_args() # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator - if not current_user.is_dataset_editing: + if not current_user.is_editor: raise Forbidden() try: @@ -213,8 +213,9 @@ def patch(self, dataset_id): parser.add_argument('partial_member_list', type=list, location='json', help='Invalid parent user list.') args = parser.parse_args() data = request.get_json() + # The role of the current user in the ta table must be admin, owner, or editor - if not current_user.is_dataset_editing: + if not current_user.is_editor or current_user.is_dataset_operator: raise Forbidden() dataset = DatasetService.update_dataset( @@ -243,7 +244,7 @@ def delete(self, dataset_id): dataset_id_str = str(dataset_id) # The role of the current user in the ta table must be admin, owner, or editor - if not current_user.is_editor: + if not current_user.is_editor or current_user.is_dataset_operator: raise Forbidden() try: diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index b3a253c167768f..21523b0ed43e43 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -228,7 +228,7 @@ def post(self, dataset_id): raise NotFound('Dataset not found.') # The role of the current user in the ta table must be admin, owner, or editor - if not current_user.is_editor: + if not current_user.is_dataset_editor: raise Forbidden() try: @@ -294,6 +294,11 @@ def post(self): parser.add_argument('retrieval_model', type=dict, required=False, nullable=False, location='json') args = parser.parse_args() + + # The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator + if not current_user.is_dataset_editor: + raise Forbidden() + if args['indexing_technique'] == 'high_quality': try: model_manager = ModelManager() diff --git a/api/models/account.py b/api/models/account.py index 9187d053133fc9..23e7528d22fa67 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -115,9 +115,12 @@ def is_editor(self): return TenantAccountRole.is_editing_role(self._current_tenant.current_role) @property - def is_dataset_editing(self): - return TenantAccountRole.is_dataset_editing_role(self._current_tenant.current_role) + def is_dataset_editor(self): + return TenantAccountRole.is_dataset_edit_role(self._current_tenant.current_role) + @property + def is_dataset_operator(self): + return self._current_tenant.current_role == TenantAccountRole.DATASET_OPERATOR class TenantStatus(str, enum.Enum): NORMAL = 'normal' @@ -150,7 +153,7 @@ def is_editing_role(role: str) -> bool: return role and role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN, TenantAccountRole.EDITOR} @staticmethod - def is_dataset_editing_role(role: str) -> bool: + def is_dataset_edit_role(role: str) -> bool: return role and role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN, TenantAccountRole.EDITOR, TenantAccountRole.DATASET_OPERATOR} diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index a88fdcc55e9ce3..8b42c392e5ba90 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -71,7 +71,8 @@ def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, s permission_filter = db.or_( Dataset.created_by == user.id, Dataset.permission == 'all_team_members', - Dataset.permission == 'partial_members' + Dataset.permission == 'partial_members', + Dataset.permission == 'only_me' ) query = query.filter(permission_filter) else: @@ -94,6 +95,7 @@ def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, s max_per_page=100, error_out=False ) + # check datasets permission, if user and user.current_role != TenantAccountRole.DATASET_OPERATOR: datasets.items, datasets.total = DatasetService.filter_datasets_by_permission( @@ -310,6 +312,18 @@ def check_dataset_permission(dataset, user): 'You do not have permission to access this dataset.' ) + @staticmethod + def check_dataset_operator_permission(user: Account = None, dataset: Dataset = None): + if dataset.permission == 'only_me' or dataset.permission == 'all_team_members': + if dataset.created_by != user.id: + raise NoPermissionError('You do not have permission to access this dataset.') + + elif dataset.permission == 'partial_members': + if not any( + dp.dataset_id == dataset.id for dp in DatasetPermission.query.filter_by(account_id=user.id).all() + ): + raise NoPermissionError('You do not have permission to access this dataset.') + @staticmethod def get_dataset_queries(dataset_id: str, page: int, per_page: int): dataset_queries = DatasetQuery.query.filter_by(dataset_id=dataset_id) \ @@ -327,17 +341,18 @@ def get_related_apps(dataset_id: str): @staticmethod def filter_datasets_by_permission(user, datasets): dataset_permission = DatasetPermission.query.filter_by(account_id=user.id).all() - if dataset_permission: - permitted_dataset_ids = {dp.dataset_id for dp in dataset_permission} - filtered_datasets = [dataset for dataset in datasets if - dataset.permission == 'all_team_members' or dataset.permission == 'only_me' or dataset.id in permitted_dataset_ids] - if filtered_datasets: - return filtered_datasets, len(filtered_datasets) - else: - all_members_visible_datasets = [dataset for dataset in datasets if dataset.permission == 'all_team_members'] - return all_members_visible_datasets, len(all_members_visible_datasets) + permitted_dataset_ids = {dp.dataset_id for dp in dataset_permission} if dataset_permission else set() + + filtered_datasets = [ + dataset for dataset in datasets if + (dataset.permission == 'all_team_members') or + (dataset.permission == 'only_me' and dataset.created_by == user.id) or + (dataset.id in permitted_dataset_ids) + ] + + filtered_count = len(filtered_datasets) - return [], 0 + return filtered_datasets, filtered_count class DocumentService: