@@ -314,7 +314,7 @@ setMethod("checkpoint",
314314# '\dontrun{
315315# ' sc <- sparkR.init()
316316# ' rdd <- parallelize(sc, 1:10, 2L)
317- # ' numParititions (rdd) # 2L
317+ # ' numPartitions (rdd) # 2L
318318# '}
319319setGeneric ("numPartitions ", function(rdd) { standardGeneric("numPartitions") })
320320
@@ -1603,32 +1603,7 @@ setMethod("join",
16031603 rdd2Tagged <- lapply(rdd2 , function (x ) { list (x [[1 ]], list (2L , x [[2 ]])) })
16041604
16051605 doJoin <- function (v ) {
1606- t1 <- vector(" list" , length(v ))
1607- t2 <- vector(" list" , length(v ))
1608- index1 <- 1
1609- index2 <- 1
1610- for (x in v ) {
1611- if (x [[1 ]] == 1L ) {
1612- t1 [[index1 ]] <- x [[2 ]]
1613- index1 <- index1 + 1
1614- } else {
1615- t2 [[index2 ]] <- x [[2 ]]
1616- index2 <- index2 + 1
1617- }
1618- }
1619- length(t1 ) <- index1 - 1
1620- length(t2 ) <- index2 - 1
1621-
1622- result <- list ()
1623- length(result ) <- length(t1 ) * length(t2 )
1624- index <- 1
1625- for (i in t1 ) {
1626- for (j in t2 ) {
1627- result [[index ]] <- list (i , j )
1628- index <- index + 1
1629- }
1630- }
1631- result
1606+ joinTaggedList(v , list (FALSE , FALSE ))
16321607 }
16331608
16341609 joined <- flatMapValues(groupByKey(unionRDD(rdd1Tagged , rdd2Tagged ), numPartitions ), doJoin )
@@ -1668,37 +1643,7 @@ setMethod("leftOuterJoin",
16681643 rdd2Tagged <- lapply(rdd2 , function (x ) { list (x [[1 ]], list (2L , x [[2 ]])) })
16691644
16701645 doJoin <- function (v ) {
1671- t1 <- vector(" list" , length(v ))
1672- t2 <- vector(" list" , length(v ))
1673- index1 <- 1
1674- index2 <- 1
1675- for (x in v ) {
1676- if (x [[1 ]] == 1L ) {
1677- t1 [[index1 ]] <- x [[2 ]]
1678- index1 <- index1 + 1
1679- } else {
1680- t2 [[index2 ]] <- x [[2 ]]
1681- index2 <- index2 + 1
1682- }
1683- }
1684- length(t1 ) <- index1 - 1
1685- len2 <- index2 - 1
1686- if (len2 == 0 ) {
1687- t2 <- list (NULL )
1688- } else {
1689- length(t2 ) <- len2
1690- }
1691-
1692- result <- list ()
1693- length(result ) <- length(t1 ) * length(t2 )
1694- index <- 1
1695- for (i in t1 ) {
1696- for (j in t2 ) {
1697- result [[index ]] <- list (i , j )
1698- index <- index + 1
1699- }
1700- }
1701- result
1646+ joinTaggedList(v , list (FALSE , TRUE ))
17021647 }
17031648
17041649 joined <- flatMapValues(groupByKey(unionRDD(rdd1Tagged , rdd2Tagged ), numPartitions ), doJoin )
@@ -1738,37 +1683,7 @@ setMethod("rightOuterJoin",
17381683 rdd2Tagged <- lapply(rdd2 , function (x ) { list (x [[1 ]], list (2L , x [[2 ]])) })
17391684
17401685 doJoin <- function (v ) {
1741- t1 <- vector(" list" , length(v ))
1742- t2 <- vector(" list" , length(v ))
1743- index1 <- 1
1744- index2 <- 1
1745- for (x in v ) {
1746- if (x [[1 ]] == 1L ) {
1747- t1 [[index1 ]] <- x [[2 ]]
1748- index1 <- index1 + 1
1749- } else {
1750- t2 [[index2 ]] <- x [[2 ]]
1751- index2 <- index2 + 1
1752- }
1753- }
1754- len1 <- index1 - 1
1755- if (len1 == 0 ) {
1756- t1 <- list (NULL )
1757- } else {
1758- length(t1 ) <- len1
1759- }
1760- length(t2 ) <- index2 - 1
1761-
1762- result <- list ()
1763- length(result ) <- length(t1 ) * length(t2 )
1764- index <- 1
1765- for (i in t1 ) {
1766- for (j in t2 ) {
1767- result [[index ]] <- list (i , j )
1768- index <- index + 1
1769- }
1770- }
1771- result
1686+ joinTaggedList(v , list (TRUE , FALSE ))
17721687 }
17731688
17741689 joined <- flatMapValues(groupByKey(unionRDD(rdd1Tagged , rdd2Tagged ), numPartitions ), doJoin )
@@ -1798,59 +1713,24 @@ setMethod("rightOuterJoin",
17981713# ' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
17991714# ' fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)),
18001715# ' # list(1, list(3, 1)),
1801- # ' # list(3, list(3, NULL)),
18021716# ' # list(2, list(NULL, 4)))
1717+ # ' # list(3, list(3, NULL)),
18031718# '}
18041719setGeneric ("fullOuterJoin ", function(rdd1, rdd2, numPartitions) { standardGeneric("fullOuterJoin") })
18051720
18061721# ' @rdname fullOuterJoin
18071722# ' @aliases fullOuterJoin,RDD,RDD-method
1723+
18081724setMethod ("fullOuterJoin ",
18091725 signature(rdd1 = " RDD" , rdd2 = " RDD" , numPartitions = " integer" ),
18101726 function (rdd1 , rdd2 , numPartitions ) {
18111727 rdd1Tagged <- lapply(rdd1 , function (x ) { list (x [[1 ]], list (1L , x [[2 ]])) })
18121728 rdd2Tagged <- lapply(rdd2 , function (x ) { list (x [[1 ]], list (2L , x [[2 ]])) })
18131729
18141730 doJoin <- function (v ) {
1815- t1 <- vector(" list" , length(v ))
1816- t2 <- vector(" list" , length(v ))
1817- index1 <- 1
1818- index2 <- 1
1819- for (x in v ) {
1820- if (x [[1 ]] == 1L ) {
1821- t1 [[index1 ]] <- x [[2 ]]
1822- index1 <- index1 + 1
1823- } else {
1824- t2 [[index2 ]] <- x [[2 ]]
1825- index2 <- index2 + 1
1826- }
1827- }
1828- len1 <- index1 - 1
1829- len2 <- index2 - 1
1830-
1831- if (len1 == 0 ) {
1832- t1 <- list (NULL )
1833- } else {
1834- length(t1 ) <- len1
1835- }
1836-
1837- if (len2 == 0 ) {
1838- t2 <- list (NULL )
1839- } else {
1840- length(t2 ) <- len2
1841- }
1842-
1843- result <- list ()
1844- length(result ) <- length(t1 ) * length(t2 )
1845- index <- 1
1846- for (i in t1 ) {
1847- for (j in t2 ) {
1848- result [[index ]] <- list (i , j )
1849- index <- index + 1
1850- }
1851- }
1852- result
1731+ joinTaggedList(v , list (TRUE , TRUE ))
18531732 }
1733+
18541734 joined <- flatMapValues(groupByKey(unionRDD(rdd1Tagged , rdd2Tagged ), numPartitions ), doJoin )
18551735 })
18561736
0 commit comments