-
Notifications
You must be signed in to change notification settings - Fork 278
/
Copy pathdataPreparation.R
53 lines (42 loc) · 2.51 KB
/
dataPreparation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
####################################################################################################
##dataPrepration.R: this script fills missing values in the data
####################################################################################################
dataPreparation = function(sqlSettings,outputTable,region,startTime,endTime)
{
sqlConnString = sqlSettings$connString
#drop table if already exists
if (rxSqlServerTableExists(inputTable,connectionString=sqlConnString)) {rxSqlServerDropTable(inputTable,connectionString=sqlConnString)}
#join demand table and temperature table and save results to table edfData***
dataQuery = paste("select a.utcTimestamp as utcTimestamp, a.region as region, a.Load as Load, b.temperature as temperature from (select * from demandSample where region=",region," and utcTimeStamp>='",startTime,"' and utcTimeStamp<='",endTime,"') as a join(select utcTimestamp, region, temperature from temperatureSample where region = ",region," and utcTimestamp >='",startTime,"' and utcTimestamp <= '",endTime,"') as b on a.utcTimestamp = b.utcTimestamp order by a.utcTimestamp",sep="")
#create sql server data sources
inputDataSQL = RxSqlServerData(sqlQuery = dataQuery, connectionString = sqlConnString)
outputDataSQL = RxSqlServerData(table = outputTable,connectionString = sqlConnString)
#fill NA values in the data
rxExec(fillNA,inData = inputDataSQL, outData = outputDataSQL)
}
fillNA = function (inData,outData)
{
#Convert input data into data frame
data=rxImport(inData)
#Create full time series by filling in missing timestamps
data$utcTimestamp = as.POSIXlt(data$utcTimestamp,tz="GMT", format="%Y-%m-%d %H:%M:%S")
minTime=min(data$utcTimestamp)
maxTime=max(data$utcTimestamp)
resolution = difftime("2015-11-01 05:00:00 UTC", "2015-11-01 04:00:00 UTC")
fullTime = seq(from=minTime, to=maxTime, by=resolution)
fullTimedf = data.frame(utcTimestamp = fullTime)
fullTimedf$utcTimestamp=as.character(fullTimedf$utcTimestamp)
data$utcTimestamp=as.character(data$utcTimestamp)
newdata = merge(fullTimedf, data, by.x = 'utcTimestamp',by.y = 'utcTimestamp', all=TRUE)
# fill in missing value based on previous day same hour's Load
for (i in 25:nrow(newdata)){
if (is.na(newdata$Load[i]))
{newdata$Load[i] = newdata$Load[i-24]}
}
for (i in 25:nrow(newdata)){
if (is.na(newdata$temperature[i]))
{newdata$temperature[i] = newdata$temperature[i-24]}
}
#upload results to SQL table
rxDataStep(inData = newdata, outFile = outData, overwrite = TRUE)
}