-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathdata_preparation.R
33 lines (23 loc) · 961 Bytes
/
data_preparation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# This file will prepare the data
# Data was originally found on data.world
# http://www.eia.gov/dnav/pet/pet_crd_crpdn_adc_mbbl_m.htm
library(dplyr)
library(tidyr)
library(stringr)
library(zoo)
getData = function(){
data = read.csv('Monthly Crude Oil Production by State 1981 - Nov 2016.csv')
data = data[data$Date != 'Back to Contents',]
data = data[data$Date != 'Sourcekey',]
data = data[data$Date != '',]
#Convert data
df = data.frame(data %>% gather(Location,ThousandBarrel, -Date))
df = data.frame(df %>% separate(Date,c('Month','Year'),"-"))
df$Date = as.Date(as.yearmon(paste(df$Month,df$Year)))
df$Location = as.character(sub('.Field.Production.of.Crude.Oil..Thousand.Barrels.','',df$Location))
df$ThousandBarrel = as.numeric(df$ThousandBarrel)
States = data.frame(Location = as.character(state.name))
df = merge(df,States,by='Location')
df$Month = factor(df$Month, levels = month.abb)
return(df)
}