Skip to content
This repository was archived by the owner on Nov 30, 2024. It is now read-only.

Commit 286a566

Browse files
committed
Guard against repository being blank
Fixes #85
1 parent 696363e commit 286a566

File tree

6 files changed

+119
-83
lines changed

6 files changed

+119
-83
lines changed

package-lock.json

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
"description": "A clearinghouse for tweet datasets",
55
"version": "0.0.2",
66
"author": "Documenting the Now <info@docnow.io>",
7+
"engines": {
8+
"node": "12"
9+
},
710
"dependencies": {
811
"@material-ui/core": "^4.11.2",
912
"@material-ui/styles": "^4.11.2",

src/components/datasets.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -257,13 +257,13 @@ function filterSearch(datasets, search) {
257257
const pattern = new RegExp(search, 'i')
258258
const slugs = []
259259
for (const d of datasets) {
260-
if (d.title.match(pattern)) {
260+
if (d.title && d.title.match(pattern)) {
261261
slugs.push(d.slug)
262-
} else if (d.description.match(pattern)) {
262+
} else if (d.description && d.description.match(pattern)) {
263263
slugs.push(d.slug)
264264
} else if (d.creators.map(c => c.name).join(' ').match(pattern)) {
265265
slugs.push(d.slug)
266-
} else if (d.repository.match(pattern)) {
266+
} else if (d.repository && d.repository.match(pattern)) {
267267
slugs.push(d.slug)
268268
} else if (d.subjects.join(' ').match(pattern)) {
269269
slugs.push(d.slug)

src/datasets/aspw-twitter-dataset-2021-11-30.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dates:
77
- end: '2021-11-27'
88
start: '2020-11-12'
99
published: 2021-11-30
10-
repository:
10+
repository: GitHub
1111
subjects:
1212
- coronavirus
1313
- pandemia
@@ -18,7 +18,7 @@ subjects:
1818
- church
1919
- border crisis
2020
- vaccinations
21-
title: the Social Archive of the Polish Web
21+
title: The Social Archive of the Polish Web
2222
tweets: 4617353
2323
url: https://github.com/mw0000/aspw-twitter-dataset-2021-11-30
2424
---

static/data/datasets.json

+105-72
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,37 @@
11
[
2+
{
3+
"title": "The Social Archive of the Polish Web",
4+
"creators": [
5+
{
6+
"name": "Marcin Wilkowski",
7+
"email": "aspw[at]wilkowski.org"
8+
}
9+
],
10+
"added": "2021-11-30T23:42:14.000Z",
11+
"published": "2021-11-30T00:00:00.000Z",
12+
"dates": [
13+
{
14+
"start": "2020-11-12",
15+
"end": "2021-11-27"
16+
}
17+
],
18+
"repository": "GitHub",
19+
"subjects": [
20+
"coronavirus",
21+
"pandemia",
22+
"politics",
23+
"media",
24+
"cities",
25+
"LGBT",
26+
"church",
27+
"border crisis",
28+
"vaccinations"
29+
],
30+
"tweets": 4617353,
31+
"url": "https://github.com/mw0000/aspw-twitter-dataset-2021-11-30",
32+
"slug": "aspw-twitter-dataset-2021-11-30",
33+
"description": "<p>4617353 tweets IDs (4398351 unique) in Polish language covering topics like: coronavirus pandemia, politics, media, cities, LGBT, church, border crisis, vaccinations. For details, see meta.csv in every directory. All this data together with the URLs of web pages linked within that tweets can be accessed in <a href=\"https://github.com/mw0000/aspw-public-archive\">https://github.com/mw0000/aspw-public-archive</a> or <a href=\"https://aspw.pl/pakiety\">https://aspw.pl/pakiety</a>.</p>"
34+
},
235
{
336
"title": "#retweetthe8th: 2018 Referendum to repeal the 8th Amendment of the Constitution of Ireland",
437
"creators": [
@@ -2811,7 +2844,7 @@
28112844
],
28122845
"tweets": 5655632,
28132846
"url": "http://dx.doi.org/10.7910/DVN/TQBLWZ",
2814-
"slug": "20170907-end-of-term-2016-us-government-twitter-archive",
2847+
"slug": "20170907-end-of-term-2016-u-s-government-twitter-archive",
28152848
"description": "<p>This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.</p>"
28162849
},
28172850
{
@@ -2845,7 +2878,7 @@
28452878
],
28462879
"tweets": 5655632,
28472880
"url": "http://dx.doi.org/10.7910/DVN/TQBLWZ",
2848-
"slug": "20170907-end-of-term-2016-u-s-government-twitter-archive",
2881+
"slug": "20170907-end-of-term-2016-us-government-twitter-archive",
28492882
"description": "<p>This dataset contains the tweet ids of 5,655,632 tweets that were collected from approximately 3000 Twitter accounts affiliated with the U.S. government. They were collected between October 21, 2016 and January 21, 2017 from the Twitter API using Social Feed Manager. This dataset was created as part of the End of Term Web Archiving initiative. The lists of accounts came from the U.S. Digital Registry and by public submissions.</p>"
28502883
},
28512884
{
@@ -3312,13 +3345,13 @@
33123345
],
33133346
"repository": "Harvard Dataverse",
33143347
"subjects": [
3315-
"Womensmarch",
3348+
"Women",
33163349
"Activism",
33173350
"Politics"
33183351
],
33193352
"tweets": 7275228,
33203353
"url": "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5ZVMOR",
3321-
"slug": "20170203-womens-march-tweet-ids",
3354+
"slug": "20170203-women-s-march-tweet-ids",
33223355
"description": "<p>This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.</p>"
33233356
},
33243357
{
@@ -3343,13 +3376,13 @@
33433376
],
33443377
"repository": "Harvard Dataverse",
33453378
"subjects": [
3346-
"Women",
3379+
"Womensmarch",
33473380
"Activism",
33483381
"Politics"
33493382
],
33503383
"tweets": 7275228,
33513384
"url": "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/5ZVMOR",
3352-
"slug": "20170203-women-s-march-tweet-ids",
3385+
"slug": "20170203-womens-march-tweet-ids",
33533386
"description": "<p>This dataset contains the tweet ids of 7,275,228 tweets related to the Women's March on January 21, 2017. They were collected between December 19, 2016 and January 23, 2017 from the Twitter API using Social Feed Manager. See included README.txt for additional information.</p>"
33543387
},
33553388
{
@@ -3645,31 +3678,6 @@
36453678
"slug": "20161230-the-fall-of-aleppo-tweets-aleppo-2016-12-13-through-2016-12-29",
36463679
"description": "<p>8,595,589 tweet ids for aleppo tweets captured during the fall of Aleppo in December 2016. Tweets can be \"rehydrated\" with Documenting the Now's twarc (<a href=\"https://github.com/DocNow/twarc\">https://github.com/DocNow/twarc</a>). twarc.py --hydrate aleppo<em>tweet</em>ids.txt > aleppo.json</p>"
36473680
},
3648-
{
3649-
"title": "#elxn42 tweets (42nd Canadian Federal Election)",
3650-
"creators": [
3651-
{
3652-
"name": "Nick Ruest",
3653-
"email": "ruestn@yorku.ca"
3654-
}
3655-
],
3656-
"added": "2016-12-24T15:14:07.000Z",
3657-
"published": "2015-11-19T00:00:00.000Z",
3658-
"dates": [
3659-
{
3660-
"start": "2015-07-25",
3661-
"end": "2015-11-05"
3662-
}
3663-
],
3664-
"repository": "Scholars Portal Dataverse",
3665-
"subjects": [
3666-
"Politics"
3667-
],
3668-
"tweets": 3039804,
3669-
"url": "http://hdl.handle.net/10864/11270",
3670-
"slug": "20161224-elxn42-tweets-42nd-canadian-federal-election",
3671-
"description": "<p>Tweet ids for #elxn42 tweets.</p>"
3672-
},
36733681
{
36743682
"title": "Ferguson Tweets",
36753683
"creators": [
@@ -3700,6 +3708,31 @@
37003708
"slug": "20161224-ferguson-tweets",
37013709
"description": "<p>This item represents a collection of 13,480,000 tweet IDs that mentioned 'ferguson' from 2014-08-10 to 2014-08-27 and 15,080,078 tweet IDs that mention \"ferguson\" between 2014-11-11 and 2014-12-08.\nThe first set includes tweets for the two week period after the shooting of Michael Brown, and the second range includes tweets around the grand jury's decision not to indict police office Darren Wilson which was announced on 2014-11-24.\nThe first set of tweets were collected by Ed Summers at the University of Maryland and the second was a collaboration between Molly Loyd, Gregory Coleman, Kimberly Lamke, Benjamin Sugar and Ed Summers.</p>"
37023710
},
3711+
{
3712+
"title": "#elxn42 tweets (42nd Canadian Federal Election)",
3713+
"creators": [
3714+
{
3715+
"name": "Nick Ruest",
3716+
"email": "ruestn@yorku.ca"
3717+
}
3718+
],
3719+
"added": "2016-12-24T15:14:07.000Z",
3720+
"published": "2015-11-19T00:00:00.000Z",
3721+
"dates": [
3722+
{
3723+
"start": "2015-07-25",
3724+
"end": "2015-11-05"
3725+
}
3726+
],
3727+
"repository": "Scholars Portal Dataverse",
3728+
"subjects": [
3729+
"Politics"
3730+
],
3731+
"tweets": 3039804,
3732+
"url": "http://hdl.handle.net/10864/11270",
3733+
"slug": "20161224-elxn42-tweets-42nd-canadian-federal-election",
3734+
"description": "<p>Tweet ids for #elxn42 tweets.</p>"
3735+
},
37033736
{
37043737
"title": "Yes All Women Twitter Dataset",
37053738
"creators": [
@@ -3802,54 +3835,54 @@
38023835
"description": "<p>Tweet ids for #panamapapers tweets.</p>"
38033836
},
38043837
{
3805-
"title": "#thechalkening tweets",
3838+
"title": "#paris #Bataclan #parisattacks #porteouverte tweets",
38063839
"creators": [
38073840
{
38083841
"name": "Nick Ruest",
38093842
"email": "ruestn@yorku.ca"
38103843
}
38113844
],
38123845
"added": "2016-12-23T22:40:17.000Z",
3813-
"published": "2016-04-13T00:00:00.000Z",
3846+
"published": "2015-12-12T00:00:00.000Z",
38143847
"dates": [
38153848
{
3816-
"start": "2016-04-03",
3817-
"end": "2016-06-06"
3849+
"start": "2015-11-04",
3850+
"end": "2015-12-08"
38183851
}
38193852
],
38203853
"repository": "Scholars Portal Dataverse",
38213854
"subjects": [
38223855
"Politics"
38233856
],
3824-
"tweets": 115524,
3825-
"url": "http://hdl.handle.net/10864/11591",
3826-
"slug": "20161223-thechalkening-tweets",
3827-
"description": "<p>Tweet ids for #thechalkening tweets.</p>"
3857+
"tweets": 14939154,
3858+
"url": "http://hdl.handle.net/10864/11312",
3859+
"slug": "20161223-paris-bataclan-parisattacks-porteouverte-tweets",
3860+
"description": "<p>Tweet ids for #paris #Bataclan #parisattacks #porteouverte tweets.</p>"
38283861
},
38293862
{
3830-
"title": "#paris #Bataclan #parisattacks #porteouverte tweets",
3863+
"title": "#thechalkening tweets",
38313864
"creators": [
38323865
{
38333866
"name": "Nick Ruest",
38343867
"email": "ruestn@yorku.ca"
38353868
}
38363869
],
38373870
"added": "2016-12-23T22:40:17.000Z",
3838-
"published": "2015-12-12T00:00:00.000Z",
3871+
"published": "2016-04-13T00:00:00.000Z",
38393872
"dates": [
38403873
{
3841-
"start": "2015-11-04",
3842-
"end": "2015-12-08"
3874+
"start": "2016-04-03",
3875+
"end": "2016-06-06"
38433876
}
38443877
],
38453878
"repository": "Scholars Portal Dataverse",
38463879
"subjects": [
38473880
"Politics"
38483881
],
3849-
"tweets": 14939154,
3850-
"url": "http://hdl.handle.net/10864/11312",
3851-
"slug": "20161223-paris-bataclan-parisattacks-porteouverte-tweets",
3852-
"description": "<p>Tweet ids for #paris #Bataclan #parisattacks #porteouverte tweets.</p>"
3882+
"tweets": 115524,
3883+
"url": "http://hdl.handle.net/10864/11591",
3884+
"slug": "20161223-thechalkening-tweets",
3885+
"description": "<p>Tweet ids for #thechalkening tweets.</p>"
38533886
},
38543887
{
38553888
"title": "#YMMfire tweets",
@@ -3876,31 +3909,6 @@
38763909
"slug": "20161223-ymmfire-tweets",
38773910
"description": "<p>Tweet ids for #YMMfire tweets captured during the 2016 Fort McMurray Wildfire from 2016-05-01 to 2016-06-25.</p>"
38783911
},
3879-
{
3880-
"title": "Election 2012 Tweet ID dataset",
3881-
"creators": [
3882-
{
3883-
"name": "Microsoft",
3884-
"email": null
3885-
}
3886-
],
3887-
"added": "2016-12-23T22:03:14.000Z",
3888-
"published": "2016-05-12T00:00:00.000Z",
3889-
"dates": [
3890-
{
3891-
"start": "2012-07-01",
3892-
"end": "2012-11-07"
3893-
}
3894-
],
3895-
"repository": "Microsoft",
3896-
"subjects": [
3897-
"Politics"
3898-
],
3899-
"tweets": 38000000,
3900-
"url": "https://www.microsoft.com/en-us/download/details.aspx?id=52598",
3901-
"slug": "20161223-election-2012-tweet-id-dataset",
3902-
"description": "<p>This data set identifies 38M tweets collected for the analysis of social media messages related to the 2012 U.S. Presidential election. The data set provides tweet IDs for tweets containing the words \"obama\", \"romney\", or both (case-insensitive matching) during the period from July 1, 2012 through November 7, 2012. The paper, “Online and Social Media Data As an Imperfect Continuous Panel Survey.” PLoS ONE 11(1): e0145406 by Diaz et al. provides further description of the dataset.</p>"
3903-
},
39043912
{
39053913
"title": "2016 United States Presidential Election Tweet Ids",
39063914
"creators": [
@@ -3934,6 +3942,31 @@
39343942
"slug": "20161223-2016-united-states-presidential-election-tweet-ids",
39353943
"description": "<p>This dataset contains the tweet ids of approximately 280 million tweets related to the 2016 United States presidential election. They were collected between July 13, 2016 and November 10, 2016 from the Twitter API using Social Feed Manager. These tweet ids are broken up into 12 collections. Each collection was collected either from the GET statuses/user_timeline method of the Twitter REST API or the POST statuses/filter method of the Twitter Stream API.</p>"
39363944
},
3945+
{
3946+
"title": "Election 2012 Tweet ID dataset",
3947+
"creators": [
3948+
{
3949+
"name": "Microsoft",
3950+
"email": null
3951+
}
3952+
],
3953+
"added": "2016-12-23T22:03:14.000Z",
3954+
"published": "2016-05-12T00:00:00.000Z",
3955+
"dates": [
3956+
{
3957+
"start": "2012-07-01",
3958+
"end": "2012-11-07"
3959+
}
3960+
],
3961+
"repository": "Microsoft",
3962+
"subjects": [
3963+
"Politics"
3964+
],
3965+
"tweets": 38000000,
3966+
"url": "https://www.microsoft.com/en-us/download/details.aspx?id=52598",
3967+
"slug": "20161223-election-2012-tweet-id-dataset",
3968+
"description": "<p>This data set identifies 38M tweets collected for the analysis of social media messages related to the 2012 U.S. Presidential election. The data set provides tweet IDs for tweets containing the words \"obama\", \"romney\", or both (case-insensitive matching) during the period from July 1, 2012 through November 7, 2012. The paper, “Online and Social Media Data As an Imperfect Continuous Panel Survey.” PLoS ONE 11(1): e0145406 by Diaz et al. provides further description of the dataset.</p>"
3969+
},
39373970
{
39383971
"title": "#JeSuisCharlie, #JeSuisAhmed, #JeSuisJuif, #CharlieHebdo tweets",
39393972
"creators": [

0 commit comments

Comments
 (0)