contributing guidelines (#1)

collective-action · Apr 2, 2019 · 62beb74 · 62beb74
1 parent 65cd104
commit 62beb74
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 19 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,50 @@
+# Contribution Guidelines
+
+One of the goals of this repo is to make sure that the data is as update-to-date as possible, so please feel free to make contributions! 
+
+To contribute, please use pull requests to add to the repo so that new additions can be checked for formatting, accuracy and validity.
+
+## Steps to Contributing
+Here are the basic steps to adding a new action using a pull request:
+1. Edit the README.md in Github.com
+1. The master branch is protected, so you won't be able to merge directly into
+   it. Instead, create a new branch (with the event name you want to add in
+   the branch title), and open a pull-request.
+1. Make a pull request from the branch you've created into the master branch.
+
+When makikng an update to the README, use the provided html code below to add a
+row to the table. 
+```html
+<tr data-author="organizejs">
+    <td data-column="action">
+    Union represenation
+    </td>
+    <td data-column="company">
+    Facebook
+    </td>
+    <td data-column="date">
+    2017-07-24 00:00:00
+    </td>
+    <td data-column="employment_type">
+    Vendor
+    </td>
+    <td data-column="source">
+    http://unitehere.org/press-releases/cafeteria-workers-at-facebook-unionize-continuing-movement-for-a-more-inclusive-silicon-valley/
+    </td>
+    <td data-column="struggle_type">
+    Wages, Health benefits
+    </td>
+    <td data-column="union_affiliation">
+    Unite Here Local 19
+    </td>
+    <td data-column="worker_count">
+    500
+    /td>
+</tr>
+```
+Please note the following aspects:
+- If you would like your github username affiliated with the action you added,
+  add an attribute `data-author` in the `<tr>` tag with your github username.
+  If you wish to remain anonymous, skip this step.
+- Each `<td>` tag must have the data attribute `data-column` with its
+  associated column.
diff --git a/README.md b/README.md
@@ -25,9 +25,9 @@ For the data to be useful, we'll want to make sure we're collecting enough data
 | date | While the date may seem trivial, collecting dates may be comlex for multi-day events such as pickets or online petitions. In this repo, we collect only the start date in the format YYYY-MM-DD. |
 | company | Since worker power is usually to demand changes from a company, we want to make sure we capture the company in our dataset. Some moments of worker power may not have an associated company. For example, online petitions or a protests against the president may consist of workers from an amalgamation of companies. In which case we can leave this field blank. |
 | action | What was the form of the action that took place? |
-| employment type | What was the employment type of the workers who took action? FTEs? Contract workers? If there are multipe employment types, they should be listed in the order of most-relevant to least relevant. It is also possible that there is no affiliated employment type, which can be the case for many public petitions. |
-| union affiliation | Was a union affiliated? And if so, which one? |
-| worker count | The number of workers active in the action. Since we're only looking at collective actions, the number must be more than 1. |
+| employment_type | What was the employment type of the workers who took action? FTEs? Contract workers? If there are multipe employment types, they should be listed in the order of most-relevant to least relevant. It is also possible that there is no affiliated employment type, which can be the case for many public petitions. |
+| union_affiliation | Was a union affiliated? And if so, which one? |
+| worker_count | The number of workers active in the action. Since we're only looking at collective actions, the number must be more than 1. |
 | struggle | The topic of struggle that caused the action. | 
 | source | The url of a reliable source that has reported on this event. |
 
@@ -95,12 +95,12 @@ For the data to be useful, we'll want to make sure we're collecting enough data
   <td data-column="date">
    2017-01-18 00:00:00
   </td>
-  <td data-column="employment_type">
-   None
-  </td>
   <td data-column="source">
    https://techcrunch.com/2017/01/18/tech-employees-protest-in-front-of-palantir-hq-over-fears-it-will-build-trumps-muslim-registry/
   </td>
+  <td data-column="employment_type">
+   None
+  </td>
   <td data-column="struggle_type">
    Ethics
   </td>

diff --git a/convert.py b/convert.py
@@ -3,14 +3,18 @@
 import argparse
 from pathlib import Path
 from utils.markdown import (
-    get_df_from_md_document, clean_md_document, save_md_table_to_csv,
-    MD_PATH, PROJECT_NAME
+    get_df_from_md_document,
+    clean_md_document,
+    save_md_table_to_csv,
+    MD_PATH,
+    PROJECT_NAME,
 )
 
+
 def _get_parser():
     parser = argparse.ArgumentParser(
         description=textwrap.dedent(
-        """
+            """
         This script is used to:
         - clean up the passed in markdown file,
         - export the table in the passed in markdown file to a csv
@@ -20,11 +24,11 @@ def _get_parser():
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
     parser.add_argument(
-        "--to-csv", action="store_true", help="Converts the table in the README to a csv file."
-    )
-    parser.add_argument(
-        "--output", help="Name of the output csv."
+        "--to-csv",
+        action="store_true",
+        help="Converts the table in the README to a csv file.",
     )
+    parser.add_argument("--output", help="Name of the output csv.")
     parser.add_argument(
         "--clean-doc", action="store_true", help="Cleans up the table in the README."
     )
@@ -33,7 +37,8 @@ def _get_parser():
         parser.error("--to-csv requires --output.")
     return args
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     args = _get_parser()
     if args.clean_doc:
         clean_md_document(Path(MD_PATH), PROJECT_NAME)

diff --git a/utils/markdown.py b/utils/markdown.py
@@ -58,7 +58,9 @@ def _md_table_to_df(table: MarkdownTable) -> pd.DataFrame:
     for i, tr in enumerate(trs):
         action = {}
         tds = tr.find_all("td")
-        if ('id' not in tr.attrs) or ('id' in tr.attrs and tr.attrs['id'] != HEADER_ROW_ID):
+        if ("id" not in tr.attrs) or (
+            "id" in tr.attrs and tr.attrs["id"] != HEADER_ROW_ID
+        ):
             for key, val in tr.attrs.items():
                 action[_serialize_meta_field(key)] = val
             for td in tds:
@@ -77,7 +79,7 @@ def _df_to_md_table(df: pd.DataFrame, table_id: str) -> MarkdownTable:
 
     # add row of headers
     tr = soup.new_tag("tr")
-    tr['id'] = HEADER_ROW_ID
+    tr["id"] = HEADER_ROW_ID
     soup.table.append(tr)
     for col in cols:
         if not col.startswith(META_FIELD_PATTERN):
@@ -159,11 +161,13 @@ def _clean_md_table(table: MarkdownTable) -> MarkdownTable:
     return soup.prettify()
 
 
-def _replace_md_table(doc: MarkdownDocument, table_id: str, table: MarkdownTable) -> MarkdownDocument:
+def _replace_md_table(
+    doc: MarkdownDocument, table_id: str, table: MarkdownTable
+) -> MarkdownDocument:
     """ Replace the table in {doc} with {table}. """
     assert _is_valid_table(table)
-    new_table = BeautifulSoup(table, 'html.parser')
-    soup = BeautifulSoup(doc, 'html.parser')
+    new_table = BeautifulSoup(table, "html.parser")
+    soup = BeautifulSoup(doc, "html.parser")
     table = soup.find("table", id=table_id)
     if not table:
         raise TableNotFound