diff --git a/.github/workflows/github-actions-test-simple.yml b/.github/workflows/github-actions-test-simple.yml index cdd37de7..f7692b09 100644 --- a/.github/workflows/github-actions-test-simple.yml +++ b/.github/workflows/github-actions-test-simple.yml @@ -37,7 +37,7 @@ jobs: pytest test_package.py - name: Upload test results - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: test-log path: ./tests/test.log diff --git a/.gitignore b/.gitignore index e4cd4659..bfb8aeb6 100644 --- a/.gitignore +++ b/.gitignore @@ -31,8 +31,8 @@ MANIFEST .DS_Store # unwanted files -src/team_comm_tools/features/lexicons/liwc_lexicons/ -src/team_comm_tools/features/lexicons/liwc_lexicons_small_test/ +src/team_comm_tools/features/lexicons/liwc_lexicons/* +src/team_comm_tools/features/lexicons/liwc_lexicons_small_test/* src/team_comm_tools/features/lexicons/certainty.txt src/team_comm_tools/modules/ src/team_comm_tools/output/* @@ -55,4 +55,7 @@ node_modules/ # testing /output /vector_data -test.py \ No newline at end of file +test.py + + + diff --git a/README.md b/README.md index 4a0cf58d..3ddc232d 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ my_feature_builder = FeatureBuilder( ) # this line of code runs the FeatureBuilder on your data -my_feature_builder.featurize(col="message") +my_feature_builder.featurize() ``` ### Data Format @@ -112,4 +112,4 @@ For more information, please refer to the [Introduction on our Read the Docs Pag Please visit our website, [https://teamcommtools.seas.upenn.edu/](https://teamcommtools.seas.upenn.edu/), for general information about our project and research. For more detailed documentation on our features and examples, please visit our [Read the Docs Page](https://conversational-featurizer.readthedocs.io/en/latest/). # Becoming a Contributor -If you would like to make pull requests to this open-sourced repository, please read our [GitHub Repo Getting Started Guide](/github_repo_getting_started.md). We welcome new feature contributions or improvements to our framework. \ No newline at end of file +If you would like to make pull requests to this open-sourced repository, please read our [GitHub Repo Getting Started Guide](/github_repo_getting_started.md). We welcome new feature contributions or improvements to our framework. diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index d217a715..8993482e 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/examples.doctree b/docs/build/doctrees/examples.doctree index c6f926b3..06563bc8 100644 Binary files a/docs/build/doctrees/examples.doctree and b/docs/build/doctrees/examples.doctree differ diff --git a/docs/build/doctrees/feature_builder.doctree b/docs/build/doctrees/feature_builder.doctree index c37bc53f..d1a34a70 100644 Binary files a/docs/build/doctrees/feature_builder.doctree and b/docs/build/doctrees/feature_builder.doctree differ diff --git a/docs/build/doctrees/features/basic_features.doctree b/docs/build/doctrees/features/basic_features.doctree index a06ddb4b..217b1cac 100644 Binary files a/docs/build/doctrees/features/basic_features.doctree and b/docs/build/doctrees/features/basic_features.doctree differ diff --git a/docs/build/doctrees/features/burstiness.doctree b/docs/build/doctrees/features/burstiness.doctree index f7649636..e0017583 100644 Binary files a/docs/build/doctrees/features/burstiness.doctree and b/docs/build/doctrees/features/burstiness.doctree differ diff --git a/docs/build/doctrees/features/certainty.doctree b/docs/build/doctrees/features/certainty.doctree index bee0a9a3..5f729c11 100644 Binary files a/docs/build/doctrees/features/certainty.doctree and b/docs/build/doctrees/features/certainty.doctree differ diff --git a/docs/build/doctrees/features/discursive_diversity.doctree b/docs/build/doctrees/features/discursive_diversity.doctree index c2820b98..bde9cf9a 100644 Binary files a/docs/build/doctrees/features/discursive_diversity.doctree and b/docs/build/doctrees/features/discursive_diversity.doctree differ diff --git a/docs/build/doctrees/features/fflow.doctree b/docs/build/doctrees/features/fflow.doctree index c6dd92c0..77d54954 100644 Binary files a/docs/build/doctrees/features/fflow.doctree and b/docs/build/doctrees/features/fflow.doctree differ diff --git a/docs/build/doctrees/features/get_all_DD_features.doctree b/docs/build/doctrees/features/get_all_DD_features.doctree index a29f36ce..a15d9bfa 100644 Binary files a/docs/build/doctrees/features/get_all_DD_features.doctree and b/docs/build/doctrees/features/get_all_DD_features.doctree differ diff --git a/docs/build/doctrees/features/get_user_network.doctree b/docs/build/doctrees/features/get_user_network.doctree index 05c38561..405b2038 100644 Binary files a/docs/build/doctrees/features/get_user_network.doctree and b/docs/build/doctrees/features/get_user_network.doctree differ diff --git a/docs/build/doctrees/features/hedge.doctree b/docs/build/doctrees/features/hedge.doctree index 8f8137cc..73f1afc7 100644 Binary files a/docs/build/doctrees/features/hedge.doctree and b/docs/build/doctrees/features/hedge.doctree differ diff --git a/docs/build/doctrees/features/index.doctree b/docs/build/doctrees/features/index.doctree index 90c91aa8..f8e916e8 100644 Binary files a/docs/build/doctrees/features/index.doctree and b/docs/build/doctrees/features/index.doctree differ diff --git a/docs/build/doctrees/features/info_exchange_zscore.doctree b/docs/build/doctrees/features/info_exchange_zscore.doctree index 5e84b167..31e4e4f9 100644 Binary files a/docs/build/doctrees/features/info_exchange_zscore.doctree and b/docs/build/doctrees/features/info_exchange_zscore.doctree differ diff --git a/docs/build/doctrees/features/information_diversity.doctree b/docs/build/doctrees/features/information_diversity.doctree index 77ed49fa..36085046 100644 Binary files a/docs/build/doctrees/features/information_diversity.doctree and b/docs/build/doctrees/features/information_diversity.doctree differ diff --git a/docs/build/doctrees/features/keywords.doctree b/docs/build/doctrees/features/keywords.doctree deleted file mode 100644 index e6228620..00000000 Binary files a/docs/build/doctrees/features/keywords.doctree and /dev/null differ diff --git a/docs/build/doctrees/features/lexical_features_v2.doctree b/docs/build/doctrees/features/lexical_features_v2.doctree index 8a15c6b8..4e0c869d 100644 Binary files a/docs/build/doctrees/features/lexical_features_v2.doctree and b/docs/build/doctrees/features/lexical_features_v2.doctree differ diff --git a/docs/build/doctrees/features/named_entity_recognition_features.doctree b/docs/build/doctrees/features/named_entity_recognition_features.doctree index 078d4fe7..55f5baad 100644 Binary files a/docs/build/doctrees/features/named_entity_recognition_features.doctree and b/docs/build/doctrees/features/named_entity_recognition_features.doctree differ diff --git a/docs/build/doctrees/features/other_lexical_features.doctree b/docs/build/doctrees/features/other_lexical_features.doctree index f4555625..fcb6a471 100644 Binary files a/docs/build/doctrees/features/other_lexical_features.doctree and b/docs/build/doctrees/features/other_lexical_features.doctree differ diff --git a/docs/build/doctrees/features/politeness_features.doctree b/docs/build/doctrees/features/politeness_features.doctree index 0ac0e8db..6b3321be 100644 Binary files a/docs/build/doctrees/features/politeness_features.doctree and b/docs/build/doctrees/features/politeness_features.doctree differ diff --git a/docs/build/doctrees/features/politeness_v2.doctree b/docs/build/doctrees/features/politeness_v2.doctree index 5bdccc6c..71889161 100644 Binary files a/docs/build/doctrees/features/politeness_v2.doctree and b/docs/build/doctrees/features/politeness_v2.doctree differ diff --git a/docs/build/doctrees/features/politeness_v2_helper.doctree b/docs/build/doctrees/features/politeness_v2_helper.doctree index f4e3d7a0..be8c27ad 100644 Binary files a/docs/build/doctrees/features/politeness_v2_helper.doctree and b/docs/build/doctrees/features/politeness_v2_helper.doctree differ diff --git a/docs/build/doctrees/features/question_num.doctree b/docs/build/doctrees/features/question_num.doctree index b6696c51..e7523935 100644 Binary files a/docs/build/doctrees/features/question_num.doctree and b/docs/build/doctrees/features/question_num.doctree differ diff --git a/docs/build/doctrees/features/readability.doctree b/docs/build/doctrees/features/readability.doctree index e23c9697..22fd1f5f 100644 Binary files a/docs/build/doctrees/features/readability.doctree and b/docs/build/doctrees/features/readability.doctree differ diff --git a/docs/build/doctrees/features/reddit_tags.doctree b/docs/build/doctrees/features/reddit_tags.doctree index 0afd4437..e9077c6a 100644 Binary files a/docs/build/doctrees/features/reddit_tags.doctree and b/docs/build/doctrees/features/reddit_tags.doctree differ diff --git a/docs/build/doctrees/features/temporal_features.doctree b/docs/build/doctrees/features/temporal_features.doctree index 414bc989..6cec6361 100644 Binary files a/docs/build/doctrees/features/temporal_features.doctree and b/docs/build/doctrees/features/temporal_features.doctree differ diff --git a/docs/build/doctrees/features/textblob_sentiment_analysis.doctree b/docs/build/doctrees/features/textblob_sentiment_analysis.doctree index b2152e7d..5b6b728c 100644 Binary files a/docs/build/doctrees/features/textblob_sentiment_analysis.doctree and b/docs/build/doctrees/features/textblob_sentiment_analysis.doctree differ diff --git a/docs/build/doctrees/features/turn_taking_features.doctree b/docs/build/doctrees/features/turn_taking_features.doctree index b81592e3..9e6183a2 100644 Binary files a/docs/build/doctrees/features/turn_taking_features.doctree and b/docs/build/doctrees/features/turn_taking_features.doctree differ diff --git a/docs/build/doctrees/features/user_centroids.doctree b/docs/build/doctrees/features/user_centroids.doctree deleted file mode 100644 index b1e8de00..00000000 Binary files a/docs/build/doctrees/features/user_centroids.doctree and /dev/null differ diff --git a/docs/build/doctrees/features/variance_in_DD.doctree b/docs/build/doctrees/features/variance_in_DD.doctree index f7ce3150..cadb3748 100644 Binary files a/docs/build/doctrees/features/variance_in_DD.doctree and b/docs/build/doctrees/features/variance_in_DD.doctree differ diff --git a/docs/build/doctrees/features/within_person_discursive_range.doctree b/docs/build/doctrees/features/within_person_discursive_range.doctree index 2e291340..ccbfcb96 100644 Binary files a/docs/build/doctrees/features/within_person_discursive_range.doctree and b/docs/build/doctrees/features/within_person_discursive_range.doctree differ diff --git a/docs/build/doctrees/features/word_mimicry.doctree b/docs/build/doctrees/features/word_mimicry.doctree index 03896436..30f058dc 100644 Binary files a/docs/build/doctrees/features/word_mimicry.doctree and b/docs/build/doctrees/features/word_mimicry.doctree differ diff --git a/docs/build/doctrees/features_conceptual/TEMPLATE.doctree b/docs/build/doctrees/features_conceptual/TEMPLATE.doctree index ff54dc0c..2c3e63df 100644 Binary files a/docs/build/doctrees/features_conceptual/TEMPLATE.doctree and b/docs/build/doctrees/features_conceptual/TEMPLATE.doctree differ diff --git a/docs/build/doctrees/features_conceptual/index.doctree b/docs/build/doctrees/features_conceptual/index.doctree index 2ac5c291..7106ee2c 100644 Binary files a/docs/build/doctrees/features_conceptual/index.doctree and b/docs/build/doctrees/features_conceptual/index.doctree differ diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree index c0eae14b..d3f131a1 100644 Binary files a/docs/build/doctrees/index.doctree and b/docs/build/doctrees/index.doctree differ diff --git a/docs/build/doctrees/intro.doctree b/docs/build/doctrees/intro.doctree index 95927eaf..8d872928 100644 Binary files a/docs/build/doctrees/intro.doctree and b/docs/build/doctrees/intro.doctree differ diff --git a/docs/build/doctrees/utils/assign_chunk_nums.doctree b/docs/build/doctrees/utils/assign_chunk_nums.doctree index 516654c3..6fc95f68 100644 Binary files a/docs/build/doctrees/utils/assign_chunk_nums.doctree and b/docs/build/doctrees/utils/assign_chunk_nums.doctree differ diff --git a/docs/build/doctrees/utils/calculate_chat_level_features.doctree b/docs/build/doctrees/utils/calculate_chat_level_features.doctree index abbd98f4..a3d61692 100644 Binary files a/docs/build/doctrees/utils/calculate_chat_level_features.doctree and b/docs/build/doctrees/utils/calculate_chat_level_features.doctree differ diff --git a/docs/build/doctrees/utils/calculate_conversation_level_features.doctree b/docs/build/doctrees/utils/calculate_conversation_level_features.doctree index b14b6cdc..f4b51cf2 100644 Binary files a/docs/build/doctrees/utils/calculate_conversation_level_features.doctree and b/docs/build/doctrees/utils/calculate_conversation_level_features.doctree differ diff --git a/docs/build/doctrees/utils/calculate_user_level_features.doctree b/docs/build/doctrees/utils/calculate_user_level_features.doctree index cd7b16e4..d52241cf 100644 Binary files a/docs/build/doctrees/utils/calculate_user_level_features.doctree and b/docs/build/doctrees/utils/calculate_user_level_features.doctree differ diff --git a/docs/build/doctrees/utils/check_embeddings.doctree b/docs/build/doctrees/utils/check_embeddings.doctree index ad89c30f..76a641b7 100644 Binary files a/docs/build/doctrees/utils/check_embeddings.doctree and b/docs/build/doctrees/utils/check_embeddings.doctree differ diff --git a/docs/build/doctrees/utils/gini_coefficient.doctree b/docs/build/doctrees/utils/gini_coefficient.doctree index 5a65346e..f42249e5 100644 Binary files a/docs/build/doctrees/utils/gini_coefficient.doctree and b/docs/build/doctrees/utils/gini_coefficient.doctree differ diff --git a/docs/build/doctrees/utils/preload_word_lists.doctree b/docs/build/doctrees/utils/preload_word_lists.doctree index 97b8ec06..89531856 100644 Binary files a/docs/build/doctrees/utils/preload_word_lists.doctree and b/docs/build/doctrees/utils/preload_word_lists.doctree differ diff --git a/docs/build/doctrees/utils/preprocess.doctree b/docs/build/doctrees/utils/preprocess.doctree index a7195d5a..596edc00 100644 Binary files a/docs/build/doctrees/utils/preprocess.doctree and b/docs/build/doctrees/utils/preprocess.doctree differ diff --git a/docs/build/doctrees/utils/summarize_features.doctree b/docs/build/doctrees/utils/summarize_features.doctree index 0dff5811..5d3249f2 100644 Binary files a/docs/build/doctrees/utils/summarize_features.doctree and b/docs/build/doctrees/utils/summarize_features.doctree differ diff --git a/docs/build/doctrees/utils/zscore_chats_and_conversation.doctree b/docs/build/doctrees/utils/zscore_chats_and_conversation.doctree index cf57ff68..42f1f4a4 100644 Binary files a/docs/build/doctrees/utils/zscore_chats_and_conversation.doctree and b/docs/build/doctrees/utils/zscore_chats_and_conversation.doctree differ diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo index 906d30ca..5ac8e945 100644 --- a/docs/build/html/.buildinfo +++ b/docs/build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 9a01a2cd3d4384710101b4a99edd7683 +config: d7678f479036f3220c73480ec4f2c467 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/html/_sources/examples.rst.txt b/docs/build/html/_sources/examples.rst.txt index e50ca58d..b65bc187 100644 --- a/docs/build/html/_sources/examples.rst.txt +++ b/docs/build/html/_sources/examples.rst.txt @@ -1,9 +1,16 @@ .. _examples: -Examples -============= +Worked Example +================ -**Note:** Our "Examples" page is constantly being improved. This page is a work in progress! +Demo / Sample Code +******************* + +After following the "Getting Started" steps below, the Team Communication Toolkit can be imported at the top of any Python script. We have provided a simple example file, "featurize.py", and a demo notebook, "demo.ipynb," under our `examples folder `_ on GitHub. + +You can also `access our demo notebook on Google Colab `_, where you can make a copy and run it on your own. + +Finally, this page will walk you through a case study, highlighting top use cases and considerations when using the toolkit. Getting Started **************** @@ -27,6 +34,8 @@ In the event that some dependency installations fail (for example, you may get a If you encounter a further issue in which the 'wordnet' package from NLTK is not found, it may be related to a known bug in NLTK in which the wordnet package does not unzip automatically. If this is the case, please follow the instructions to manually unzip it, documented in `this thread `_. +You can also find a full list of our requirements `here `_. + Import Recommendations: Virtual Environment and Pip +++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -34,11 +43,6 @@ Import Recommendations: Virtual Environment and Pip **We also strongly recommend that your version of pip is up-to-date (>=24.0).** There have been reports in which users have had trouble downloading dependencies (specifically, the Spacy package) with older versions of pip. If you get an error with downloading ``en_core_web_sm``, we recommend updating pip. -Using the Package -****************** - -After you install it, the Team Communication Toolkit can be imported at the top of any Python script. We have provided a simple example file, "featurize.py", under our `examples folder `_ on GitHub, and this walkthrough will highlight some of our top use cases. However, it won't follow the file exactly. - Importing the Package ++++++++++++++++++++++ @@ -52,10 +56,15 @@ Now you have access to the :ref:`feature_builder`. This is the main class that y *Note*: PyPI treats hyphens and underscores equally, so "pip install team_comm_tools" and "pip install team-comm-tools" are equivalent. However, Python does NOT treat them equally, and **you should use underscores when you import the package, like this: from team_comm_tools import FeatureBuilder**. -Running the FeatureBuilder on Your Data -++++++++++++++++++++++++++++++++++++++++ +Walkthrough: Running the FeatureBuilder on Your Data +***************************************************** + +Next, we'll go through the details of running the FeatureBuilder on your data, discussing each of the specific options / parameters at your disposal. + +Configuring the FeatureBuilder +++++++++++++++++++++++++++++++++ -Next, you'll want to get some data to run your FeatureBuilder on! The FeatureBuilder accepts any Pandas DataFrame as the input, so you can read in data in whatever format you like. For the purposes of this walkthrough, we'll be using some jury deliberation data from `Hu et al. (2021) `_. +The FeatureBuilder accepts any Pandas DataFrame as the input, so you can read in data in whatever format you like. For the purposes of this walkthrough, we'll be using some jury deliberation data from `Hu et al. (2021) `_. We first import Pandas and read in the dataframe: @@ -81,7 +90,7 @@ Now we are ready to call the FeatureBuilder on our data. All we need to do is de output_file_path_conv_level = "./jury_output_conversation_level.csv", turns = True ) - jury_feature_builder.featurize(col="message") + jury_feature_builder.featurize() Basic Input Columns ^^^^^^^^^^^^^^^^^^^^ @@ -106,7 +115,7 @@ Basic Input Columns timestamp_col = ("timestamp_start", "timestamp_end") -* **In the FeatureBuilder, we assume that every conversation has a unique identifying string, and that all the messages belonging to the same conversation have the same identifier.** Typically, we would use the column **conversation_id_col** to indicate the name of this identifier. However, we also support cases in which there is more than one identifer per conversation, and our example here illustrates this functionality. The **grouping_keys** parameter means that we want to group by more than one column, and allow the FeatureBuilder to treat unique combinations of the grouping keys as the "conversational identifier". This means that we treat each unique combination of "batch_num" and "round_num" as a different conversation. +* **In the FeatureBuilder, we assume that every conversation has a unique identifying string, and that all the messages belonging to the same conversation have the same identifier.** Typically, we would use the column **conversation_id_col** to indicate the name of this identifier. However, we also support cases in which there is more than one identifer per conversation, and our example here illustrates this functionality. The **grouping_keys** parameter means that we want to group by more than one column, and allow the FeatureBuilder to treat unique combinations of the grouping keys as the "conversational identifier". This means that we treat each unique combination of "batch_num" and "round_num" as a different conversation, and we *override* the **conversation_id_col** if a list of **grouping_keys** is present. * In cases where you are using **conversation_id_col**, "conversation_num" is the default value for this parameter. @@ -162,7 +171,7 @@ Basic Input Columns * These messages by John can be thought of as a single turn, in which he says, "Hey Michael, how are you? I wanted to talk to you real quick!" Instead, however, John sent three messages in a row, suggesting that he took three "turns." When the **turns** parameter is set to True, the FeatureBuilder will automatically combine messages like this into a single "turn." - * We note, however, that one of our features (`:ref:turn_taking_index`) will always give the value of "1" in the case when you set **turns=True**, since, by definition, people will never take multiple "turns" in a row. + * We note, however, that one of our features (:ref:`turn_taking_index`) will always give the value of "1" in the case when you set **turns=True**, since, by definition, people will never take multiple "turns" in a row. Advanced Configuration Columns diff --git a/docs/build/html/_sources/features/index.rst.txt b/docs/build/html/_sources/features/index.rst.txt index 4db47202..1f67bc29 100644 --- a/docs/build/html/_sources/features/index.rst.txt +++ b/docs/build/html/_sources/features/index.rst.txt @@ -48,4 +48,12 @@ Once utterance-level features are computed, we compute conversation-level featur Speaker- (User) Level Features ********************************* -User-level features currently represent an aggregation of features at the utterance- level (for example, the average number of words spoken *by a particular user*). There is therefore no separate speaker-level feature documentation; you may reference the :ref:`Speaker (User)-Level Features Page ` for more information. +User-level features generally represent an aggregation of features at the utterance- level (for example, the average number of words spoken *by a particular user*). There is therefore limited speaker-level feature documentation, other than a function used to compute the "network" of other speakers that an individual interacts with in a conversation. + +You may reference the :ref:`Speaker (User)-Level Features Page ` for more information. + + +.. toctree:: + :maxdepth: 1 + + get_user_network \ No newline at end of file diff --git a/docs/build/html/_sources/features/keywords.rst.txt b/docs/build/html/_sources/features/keywords.rst.txt deleted file mode 100644 index 14473088..00000000 --- a/docs/build/html/_sources/features/keywords.rst.txt +++ /dev/null @@ -1,7 +0,0 @@ -keywords module -=============== - -.. automodule:: keywords - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/build/html/_sources/features/user_centroids.rst.txt b/docs/build/html/_sources/features/user_centroids.rst.txt deleted file mode 100644 index 7580fef4..00000000 --- a/docs/build/html/_sources/features/user_centroids.rst.txt +++ /dev/null @@ -1,7 +0,0 @@ -user\_centroids module -====================== - -.. automodule:: features.user_centroids - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/build/html/_sources/features_conceptual/TEMPLATE.rst.txt b/docs/build/html/_sources/features_conceptual/TEMPLATE.rst.txt index ee90ab2a..4ad99637 100644 --- a/docs/build/html/_sources/features_conceptual/TEMPLATE.rst.txt +++ b/docs/build/html/_sources/features_conceptual/TEMPLATE.rst.txt @@ -1,4 +1,4 @@ - .. _TEMPLATE: +.. _TEMPLATE: FEATURE NAME ============ diff --git a/docs/build/html/_sources/features_conceptual/index.rst.txt b/docs/build/html/_sources/features_conceptual/index.rst.txt index b4344357..38a7a321 100644 --- a/docs/build/html/_sources/features_conceptual/index.rst.txt +++ b/docs/build/html/_sources/features_conceptual/index.rst.txt @@ -5,8 +5,6 @@ Features: Conceptual Documentation In contrast with the :ref:`Features: Technical Documentation ` page, this page aims to provide a resource for conceptually understanding the features: what are they, what are they meant to measure, and how is our operationalization connected to concepts from social science? -**Please note that this page is currently under construction.** - Utterance- (Chat) Level Features ********************************* @@ -14,6 +12,9 @@ Utterance- (Chat) Level Features :maxdepth: 1 named_entity_recognition + time_difference + liwc + certainty information_exchange proportion_of_first_person_pronouns message_length @@ -28,11 +29,14 @@ Utterance- (Chat) Level Features function_word_accommodation mimicry_bert moving_mimicry - time_difference + forward_flow hedge + questions + conversational_repair politeness_strategies politeness_receptiveness_markers online_discussions_tags + Conversation-Level Features **************************** @@ -40,4 +44,8 @@ Conversation-Level Features .. toctree:: :maxdepth: 1 - turn_taking_index \ No newline at end of file + gini_coefficient + turn_taking_index + team_burstiness + discursive_diversity + information_diversity \ No newline at end of file diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt index 0e2eadbd..f17dbd8b 100644 --- a/docs/build/html/_sources/index.rst.txt +++ b/docs/build/html/_sources/index.rst.txt @@ -1,7 +1,4 @@ -.. Team Communication Toolkit documentation master file, created by - sphinx-quickstart on Fri Jun 14 12:54:37 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +.. _index_main: The Team Communication Toolkit =============================== @@ -79,15 +76,16 @@ Once you import the tool, you will be able to declare a FeatureBuilder object, w ) # this line of code runs the FeatureBuilder on your data - my_feature_builder.featurize(col="message") + my_feature_builder.featurize() -Use the Table of Contents below to learn more about our tool. We recommend that you begin in the "Introduction" section, then explore other sections of the documentation as they become relevant to you. More information on using our tool can be found in :ref:`examples`. +Use the Table of Contents below to learn more about our tool. We recommend that you begin in the "Introduction" section, then explore other sections of the documentation as they become relevant to you. We recommend reading :ref:`basics` for a high-level overview of the requirements and parameters, and then reading through the :ref:`examples` for a detailed walkthrough and discussion of considerations. .. toctree:: :maxdepth: 2 :caption: Contents: intro + basics feature_builder features/index features_conceptual/index diff --git a/docs/build/html/_sources/intro.rst.txt b/docs/build/html/_sources/intro.rst.txt index d262bcd7..c90d53b2 100644 --- a/docs/build/html/_sources/intro.rst.txt +++ b/docs/build/html/_sources/intro.rst.txt @@ -20,7 +20,7 @@ Finally, even when researchers build and measure their own features, what happen What if there existed a single package did it all for you? What if, instead of combing through the literature, deciding on constructs of interest, and putting together packages to build out features on your own, a vast (and ever-increasing!) collection of conversational attributes was readily available at your fingertips? -We introduce the **Team Communication Toolkit**: a "one-stop shop" for exploring conversational data. Our framework is a single package encompassing a variety of common, research-backed measures of communication. These include tools like `LIWC `_, `Convokit `_, `The Conversational Receptiveness Package `_, `The Lexical Suite `_ and much more. If you are working with conversational data for the first time, or just seeking to understand what you can possibly learn from open-ended conversations, this is the right place for you. We have collected over 100 features that you can explore, so that researchers can spend more time learning from conversations and less time worrying about how to begin studying them. +We introduce the **Team Communication Toolkit**: a "one-stop shop" for exploring conversational data. Our framework is a single package encompassing a variety of common, research-backed measures of communication. These include tools like `LIWC `_, `ConvoKit `_, `The Conversational Receptiveness Package `_, `The Lexical Suite `_ and much more. If you are working with conversational data for the first time, or just seeking to understand what you can possibly learn from open-ended conversations, this is the right place for you. We have collected over 100 features that you can explore, so that researchers can spend more time learning from conversations and less time worrying about how to begin studying them. The FeatureBuilder ******************* @@ -54,6 +54,10 @@ The three levels of analysis are closely interconnected. In the Toolkit, Utteran The driving functions for generating features at different levels are located in the :ref:`Utilities `. In general, you do not have to directly interact with these utilties, as the Toolkit generates utterance-, speaker-, and conversational-level features by default. However, you (as a researcher) may only only be interested a subset of the outputs, and customizable options will be made avilable in the FeatureBuilder soon. +Getting Started +***************** +Please refer to the :ref:`index_main` to get started. From there, we recommend reading :ref:`basics` for a high-level overview of the requirements and parameters, and then reading through the :ref:`examples` for a detailed walkthrough and discussion of considerations. + Feature Documentation ********************** For technical information on the features generated by our Toolkit, please refer to the :ref:`Features: Technical Documentation ` page. diff --git a/docs/build/html/_static/searchtools.js b/docs/build/html/_static/searchtools.js index 92da3f8b..b08d58c9 100644 --- a/docs/build/html/_static/searchtools.js +++ b/docs/build/html/_static/searchtools.js @@ -178,7 +178,7 @@ const Search = { htmlToText: (htmlString, anchor) => { const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); - for (const removalQuery of [".headerlinks", "script", "style"]) { + for (const removalQuery of [".headerlink", "script", "style"]) { htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); } if (anchor) { @@ -328,13 +328,14 @@ const Search = { for (const [title, foundTitles] of Object.entries(allTitles)) { if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { for (const [file, id] of foundTitles) { - let score = Math.round(100 * queryLower.length / title.length) + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles normalResults.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, id !== null ? "#" + id : "", null, - score, + score + boost, filenames[file], ]); } diff --git a/docs/build/html/examples.html b/docs/build/html/examples.html index da44669a..82a0c604 100644 --- a/docs/build/html/examples.html +++ b/docs/build/html/examples.html @@ -4,7 +4,7 @@ - Examples — Team Communication Toolkit 0.1.1 documentation + Worked Example — Team Communication Toolkit 0.1.1 documentation @@ -22,7 +22,7 @@ - + @@ -47,18 +47,20 @@

Contents:

  • Introduction
  • +
  • The Basics
  • feature_builder module
  • Features: Technical Documentation
  • Features: Conceptual Documentation
  • -
  • Examples
      +
    • Worked Example
        +
      • Demo / Sample Code
      • Getting Started
      • -
      • Using the Package
          -
        • Importing the Package
        • -
        • Running the FeatureBuilder on Your Data
            +
          • Walkthrough: Running the FeatureBuilder on Your Data
              +
            • Configuring the FeatureBuilder @@ -85,7 +87,7 @@
            • @@ -372,7 +380,7 @@

              Additional FeatureBuilder Considerations - + diff --git a/docs/build/html/feature_builder.html b/docs/build/html/feature_builder.html index 6a1baf14..42dec2fe 100644 --- a/docs/build/html/feature_builder.html +++ b/docs/build/html/feature_builder.html @@ -22,7 +22,7 @@ - + @@ -47,6 +47,7 @@

              Contents:

              • Introduction
              • +
              • The Basics
              • feature_builder module
                • FeatureBuilder @@ -173,22 +174,19 @@
                  -featurize(col: str = 'message') None
                  +featurize() None

                  Main driver function for feature generation.

                  This function creates chat-level features, generates features for different truncation percentages of the data if specified, and produces user-level and conversation-level features. Finally, the features are saved into the designated output files.

                  -
                  Parameters:
                  -

                  col (str, optional) – Column to preprocess, defaults to “message”

                  +
                  Returns:
                  +

                  None

                  -
                  Returns:
                  +
                  Return type:

                  None

                  -
                  Return type:
                  -

                  None

                  -
                  @@ -308,7 +306,7 @@