Watts-Lab · pradnayapathak · Oct 22, 2024 · Nov 12, 2024 · Nov 21, 2024 · Dec 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -41,6 +41,7 @@ src/team_comm_tools/ipython_notebooks/.ipynb_checkpoints/
 tests/ipython_notebooks/.ipynb_checkpoints/
 tests/data/vector_data/
 tests/test.log
+tests/helper.ipynb
 tests/output/*
 tests/vector_data/*
 src/utils/__pycache__/

diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle
diff --git a/docs/build/doctrees/examples.doctree b/docs/build/doctrees/examples.doctree
diff --git a/docs/build/doctrees/feature_builder.doctree b/docs/build/doctrees/feature_builder.doctree
diff --git a/docs/build/doctrees/features/basic_features.doctree b/docs/build/doctrees/features/basic_features.doctree
diff --git a/docs/build/doctrees/features/burstiness.doctree b/docs/build/doctrees/features/burstiness.doctree
diff --git a/docs/build/doctrees/features/certainty.doctree b/docs/build/doctrees/features/certainty.doctree
diff --git a/docs/build/doctrees/features/discursive_diversity.doctree b/docs/build/doctrees/features/discursive_diversity.doctree
diff --git a/docs/build/doctrees/features/fflow.doctree b/docs/build/doctrees/features/fflow.doctree
diff --git a/docs/build/doctrees/features/get_all_DD_features.doctree b/docs/build/doctrees/features/get_all_DD_features.doctree
diff --git a/docs/build/doctrees/features/get_user_network.doctree b/docs/build/doctrees/features/get_user_network.doctree
diff --git a/docs/build/doctrees/features/hedge.doctree b/docs/build/doctrees/features/hedge.doctree
diff --git a/docs/build/doctrees/features/index.doctree b/docs/build/doctrees/features/index.doctree
diff --git a/docs/build/doctrees/features/info_exchange_zscore.doctree b/docs/build/doctrees/features/info_exchange_zscore.doctree
diff --git a/docs/build/doctrees/features/information_diversity.doctree b/docs/build/doctrees/features/information_diversity.doctree
diff --git a/docs/build/doctrees/features/lexical_features_v2.doctree b/docs/build/doctrees/features/lexical_features_v2.doctree
diff --git a/docs/build/doctrees/features/named_entity_recognition_features.doctree b/docs/build/doctrees/features/named_entity_recognition_features.doctree
diff --git a/docs/build/doctrees/features/other_lexical_features.doctree b/docs/build/doctrees/features/other_lexical_features.doctree
diff --git a/docs/build/doctrees/features/politeness_features.doctree b/docs/build/doctrees/features/politeness_features.doctree
diff --git a/docs/build/doctrees/features/politeness_v2.doctree b/docs/build/doctrees/features/politeness_v2.doctree
diff --git a/docs/build/doctrees/features/politeness_v2_helper.doctree b/docs/build/doctrees/features/politeness_v2_helper.doctree
diff --git a/docs/build/doctrees/features/question_num.doctree b/docs/build/doctrees/features/question_num.doctree
diff --git a/docs/build/doctrees/features/readability.doctree b/docs/build/doctrees/features/readability.doctree
diff --git a/docs/build/doctrees/features/reddit_tags.doctree b/docs/build/doctrees/features/reddit_tags.doctree
diff --git a/docs/build/doctrees/features/temporal_features.doctree b/docs/build/doctrees/features/temporal_features.doctree
diff --git a/docs/build/doctrees/features/textblob_sentiment_analysis.doctree b/docs/build/doctrees/features/textblob_sentiment_analysis.doctree
diff --git a/docs/build/doctrees/features/turn_taking_features.doctree b/docs/build/doctrees/features/turn_taking_features.doctree
diff --git a/docs/build/doctrees/features/variance_in_DD.doctree b/docs/build/doctrees/features/variance_in_DD.doctree
diff --git a/docs/build/doctrees/features/within_person_discursive_range.doctree b/docs/build/doctrees/features/within_person_discursive_range.doctree
diff --git a/docs/build/doctrees/features/word_mimicry.doctree b/docs/build/doctrees/features/word_mimicry.doctree
diff --git a/docs/build/doctrees/features_conceptual/TEMPLATE.doctree b/docs/build/doctrees/features_conceptual/TEMPLATE.doctree
diff --git a/docs/build/doctrees/features_conceptual/content_word_accommodation.doctree b/docs/build/doctrees/features_conceptual/content_word_accommodation.doctree
diff --git a/docs/build/doctrees/features_conceptual/function_word_accommodation.doctree b/docs/build/doctrees/features_conceptual/function_word_accommodation.doctree
diff --git a/docs/build/doctrees/features_conceptual/index.doctree b/docs/build/doctrees/features_conceptual/index.doctree
diff --git a/docs/build/doctrees/features_conceptual/mimicry_bert.doctree b/docs/build/doctrees/features_conceptual/mimicry_bert.doctree
diff --git a/docs/build/doctrees/features_conceptual/moving_mimicry.doctree b/docs/build/doctrees/features_conceptual/moving_mimicry.doctree
diff --git a/docs/build/doctrees/features_conceptual/named_entity_recognition.doctree b/docs/build/doctrees/features_conceptual/named_entity_recognition.doctree
diff --git a/docs/build/doctrees/features_conceptual/positivity_bert.doctree b/docs/build/doctrees/features_conceptual/positivity_bert.doctree
diff --git a/docs/build/doctrees/features_conceptual/turn_taking_index.doctree b/docs/build/doctrees/features_conceptual/turn_taking_index.doctree
diff --git a/docs/build/doctrees/features_conceptual/word_ttr.doctree b/docs/build/doctrees/features_conceptual/word_ttr.doctree
diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree
diff --git a/docs/build/doctrees/intro.doctree b/docs/build/doctrees/intro.doctree
diff --git a/docs/build/doctrees/utils/assign_chunk_nums.doctree b/docs/build/doctrees/utils/assign_chunk_nums.doctree
diff --git a/docs/build/doctrees/utils/calculate_chat_level_features.doctree b/docs/build/doctrees/utils/calculate_chat_level_features.doctree
diff --git a/docs/build/doctrees/utils/calculate_conversation_level_features.doctree b/docs/build/doctrees/utils/calculate_conversation_level_features.doctree
diff --git a/docs/build/doctrees/utils/calculate_user_level_features.doctree b/docs/build/doctrees/utils/calculate_user_level_features.doctree
diff --git a/docs/build/doctrees/utils/check_embeddings.doctree b/docs/build/doctrees/utils/check_embeddings.doctree
diff --git a/docs/build/doctrees/utils/gini_coefficient.doctree b/docs/build/doctrees/utils/gini_coefficient.doctree
diff --git a/docs/build/doctrees/utils/index.doctree b/docs/build/doctrees/utils/index.doctree
diff --git a/docs/build/doctrees/utils/preload_word_lists.doctree b/docs/build/doctrees/utils/preload_word_lists.doctree
diff --git a/docs/build/doctrees/utils/preprocess.doctree b/docs/build/doctrees/utils/preprocess.doctree
diff --git a/docs/build/doctrees/utils/summarize_features.doctree b/docs/build/doctrees/utils/summarize_features.doctree
diff --git a/docs/build/doctrees/utils/zscore_chats_and_conversation.doctree b/docs/build/doctrees/utils/zscore_chats_and_conversation.doctree
diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 9a01a2cd3d4384710101b4a99edd7683
+config: d7678f479036f3220c73480ec4f2c467
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/docs/build/html/_sources/examples.rst.txt b/docs/build/html/_sources/examples.rst.txt
diff --git a/docs/build/html/_sources/features/index.rst.txt b/docs/build/html/_sources/features/index.rst.txt
@@ -32,7 +32,11 @@ Utterance-Level features are calculated *first* in the Toolkit, as many conversa
 
 Conversation-Level Features
 ****************************
-Once utterance-level features are computed, we compute conversation-level features; some of these features represent an aggregation of utterance-level information (for example, the "average level of positivity" in a conversation is simply the mean positivity score for each utterance). Other conversation-level features are constructs that are defined only at the conversation-level, such as the level of "burstiness" in a team's communication patterns.
+
+Base Conversation-Level Features
++++++++++++++++++++++++++++++++++++
+
+The following features are constructs that are defined only at the conversation-level, such as the level of "burstiness" in a team's communication patterns. We call these the "base" conversation-level features, and they can be accessed using a property of the ``FeatureBuilder`` object: ``FeatureBuilder.conv_features_base``.
 
 .. toctree::
    :maxdepth: 1
@@ -45,13 +49,19 @@ Once utterance-level features are computed, we compute conversation-level featur
    variance_in_DD
    within_person_discursive_range
    turn_taking_features
+   lsm
+
+Conversation-Level Aggregates
++++++++++++++++++++++++++++++++++++
+Once utterance-level features are computed, we compute conversation-level features; some of these features represent an aggregation of utterance-level information (for example, the "average level of positivity" in a conversation is simply the mean positivity score for each utterance).
+
+By default, all numeric attributes generated at the utterance (chat) level are aggregated using the functions ``mean``, ``max``, ``min``, and ``stdev``. However, this behavior can be customized, with details in the Worked Example (see :ref:`custom_aggregation`).
 
 Speaker- (User) Level Features
 *********************************
 User-level features generally represent an aggregation of features at the utterance- level (for example, the average number of words spoken *by a particular user*). There is therefore limited speaker-level feature documentation, other than a function used to compute the "network" of other speakers that an individual interacts with in a conversation.
 
-You may reference the :ref:`Speaker (User)-Level Features Page <user_level_features>` for more information.
-
+You may reference the :ref:`Speaker (User)-Level Features Page <user_level_features>` for more information, as well as the details in the Worked Example (see :ref:`custom_aggregation`).
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/build/html/_sources/features_conceptual/content_word_accommodation.rst.txt b/docs/build/html/_sources/features_conceptual/content_word_accommodation.rst.txt
@@ -13,10 +13,16 @@ Citation
 
 Implementation Basics 
 **********************
-To compute the feature, we count the number of shared content words (defined as anything that is not on the function word list) between the current and previous utterance in a conversation, then normalize it by the frequency of the word across all inputs in the dataset. This follows the original authors' method:
+To compute the feature, we count the number of shared content words (defined as anything that is not on the function word list) between the current and previous utterance in a conversation, normalized by the frequency at which the word appears. This follows the original authors' method:
 
 	Content words are defined as any word that is not a function word. For each content word w in a given speaker’s turn, if w also occurs in the immediately preceding turn of the other, we count w as an accommodated content word. The raw count of accommodated content words is be the total number of these accommodated content words over every turn in the conversation side. Because content words vary widely in frequency, we normalized our counts by the frequency of each word.
 
+For completeness, we interprete "the frequency of each word" in two distinct ways:
+
+1. **The frequency of each word across the entire dataset (`content_word_accommodation`)**: here, we normalize non-function words with respect to the language used across all conversations in the dataset. This version of accommodation is useful if the entire dataset consists of similar conversations, or conversations about the same topic. Normalizing with respect to a larger dataset will be useful in establishing better estimates in identifying (and appropriately weigting) whichs words carry meaningful content in a particular domain.
+
+2. **The frequency of each word within a given conversation (`content_word_accommodation_per_conv`)**: here, we normalize non-function words with respect only to the language in a given conversation. This version of accommodation is useful if the dataset consists of very distinct conversations, for which it may not make sense to assume that the distribution of which words are "important" will hold across different domains.
+
 The feature requires a reference list of function words, which are defined by the original authors as follows.
 
 **Auxiliary and copular verbs**

diff --git a/docs/build/html/_sources/features_conceptual/index.rst.txt b/docs/build/html/_sources/features_conceptual/index.rst.txt
@@ -48,4 +48,5 @@ Conversation-Level Features
    turn_taking_index
    team_burstiness
    discursive_diversity
-   information_diversity
+   information_diversity
+   lsm
diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt
@@ -150,10 +150,10 @@ Use the Table of Contents below to learn more about our tool. We recommend that
 
    intro
    basics
-   feature_builder
+   examples
    features/index
    features_conceptual/index
-   examples
+   feature_builder
    utils/index
 
 Indices and Tables

diff --git a/docs/build/html/_static/searchtools.js b/docs/build/html/_static/searchtools.js
@@ -178,7 +178,7 @@ const Search = {
 
   htmlToText: (htmlString, anchor) => {
     const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
-    for (const removalQuery of [".headerlinks", "script", "style"]) {
+    for (const removalQuery of [".headerlink", "script", "style"]) {
       htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() });
     }
     if (anchor) {
@@ -328,13 +328,14 @@ const Search = {
     for (const [title, foundTitles] of Object.entries(allTitles)) {
       if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) {
         for (const [file, id] of foundTitles) {
-          let score = Math.round(100 * queryLower.length / title.length)
+          const score = Math.round(Scorer.title * queryLower.length / title.length);
+          const boost = titles[file] === title ? 1 : 0;  // add a boost for document titles
           normalResults.push([
             docNames[file],
             titles[file] !== title ? `${titles[file]} > ${title}` : title,
             id !== null ? "#" + id : "",
             null,
-            score,
+            score + boost,
             filenames[file],
           ]);
         }