yum -y install R
cd /etc/default
sudo ln -s /etc/default/hadoop /etc/profile.d/hadoop.sh
cat /etc/profile.d/hadoop.sh | sed 's/export //g' > ~/.Renviron
wget http://download2.rstudio.org/rstudio-server-0.97.332-x86_64.rpm
sudo yum install --nogpgcheck rstudio-server-0.97.332-x86_64.rpm
$ sudo R CMD INSTALL Rcpp_0.10.3.tar.gz
$ sudo R CMD INSTALL digest_0.6.3.tar.gz
$ sudo R CMD INSTALL plyr_1.8.tar.gz
$ sudo R CMD INSTALL reshape2_1.2.2.tar.gz
$ sudo R CMD INSTALL rhdfs_1.0.5.tar.gz
$ sudo R CMD INSTALL rJava_0.9-4.tar.gz
$ sudo R CMD INSTALL RJSONIO_1.0-2.tar.gz
$ sudo R CMD INSTALL stringr_0.6.2.tar.gz
$ sudo R
> install.packages(“functional”)
/usr/lib/hadoop/contrib/streaming/hadoop-streaming-1.2.0.1.3.0.0-107.jar
Set some environ vars in .bashrc:
export HADOOP_HOME=/usr/lib/hadoop
export HADOOP_CMD=/usr/bin/hadoop
export HADOOP_STREAMING=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.2.0.jar
It's done
NOte:
To install environmental variable from the R
>Sys.setenv(HADOOP_HOME="/usr/lib/hadoop")
>Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
To view the environmental variable defined path:
>Sys.getenv("HADOOP_HOME")
To test our defined or designed R in hadoop is working or not:
>library(rmr2)
>library(rhdfs)
>hdfs.init()
>ints = to.dfs(1:100)
>calc = mapreduce(input = ints,map = function(k, v) cbind(v, 2*v))
from.dfs(calc)
$val
v
[1,] 1 2
[2,] 2 4
[3,] 3 6
[4,] 4 8
[5,] 5 10
.....
If we want to run HDFS filesystem commands from R, we first need to initialize rhdfs using hdfs.init() function, then we can run the well-known ls, rm, mkdir, stat, etc commands:
> hdfs.init()
> hdfs.ls("/tmp")
permission owner group size modtime file
1 drwxr-xr-x istvan supergroup 0 2013-02-25 21:59 /tmp/RtmpC94L4R
2 drwxr-xr-x istvan supergroup 0 2013-02-25 21:49 /tmp/hadoop-istvan
> hdfs.stat("/tmp")
perms isDir block replication owner group size modtime path
1 rwxr-xr-x TRUE 0 0 istvan supergroup 0 45124-08-29
-------------Additional step if it is not working -------------------
yum -y --enablerepo=epel install R R-devel
R CMD javareconf
Start R REPL and install some packages:
install.packages(c('Rcpp', 'RJSONIO', 'itertools', 'digest'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
install.packages(c('functional', 'stringr', 'plyr'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
install.packages(c('rJava'), repos="http://cran.revolutionanalytics.com" )
install.packages(c('randomForest'), repos="http://cran.revolutionanalytics.com" )
install.packages(c('reshape2'), repos="http://cran.revolutionanalytics.com" )
Then dnload RHadoop and install
git clone git://github.com/RevolutionAnalytics/rmr2.git
R CMD INSTALL --byte-compile rmr2/pkg/
Set some environ vars in .bashrc:
export HADOOP_HOME=/usr/lib/hadoop
export HADOOP_CMD=/usr/bin/hadoop
export HADOOP_STREAMING=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.2.0.jar
Make sure to source the new variables before continuing.
Installed rhdfs:
git clone git://github.com/RevolutionAnalytics/rhdfs.git
R CMD INSTALL --byte-compile rhdfs/pkg/
cd /etc/default
sudo ln -s /etc/default/hadoop /etc/profile.d/hadoop.sh
cat /etc/profile.d/hadoop.sh | sed 's/export //g' > ~/.Renviron
wget http://download2.rstudio.org/rstudio-server-0.97.332-x86_64.rpm
sudo yum install --nogpgcheck rstudio-server-0.97.332-x86_64.rpm
$ sudo R CMD INSTALL Rcpp_0.10.3.tar.gz
$ sudo R CMD INSTALL digest_0.6.3.tar.gz
$ sudo R CMD INSTALL plyr_1.8.tar.gz
$ sudo R CMD INSTALL reshape2_1.2.2.tar.gz
$ sudo R CMD INSTALL rhdfs_1.0.5.tar.gz
$ sudo R CMD INSTALL rJava_0.9-4.tar.gz
$ sudo R CMD INSTALL RJSONIO_1.0-2.tar.gz
$ sudo R CMD INSTALL stringr_0.6.2.tar.gz
$ sudo R
> install.packages(“functional”)
/usr/lib/hadoop/contrib/streaming/hadoop-streaming-1.2.0.1.3.0.0-107.jar
Set some environ vars in .bashrc:
export HADOOP_HOME=/usr/lib/hadoop
export HADOOP_CMD=/usr/bin/hadoop
export HADOOP_STREAMING=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.2.0.jar
It's done
NOte:
To install environmental variable from the R
>Sys.setenv(HADOOP_HOME="/usr/lib/hadoop")
>Sys.setenv(HADOOP_CMD="/usr/bin/hadoop")
To view the environmental variable defined path:
>Sys.getenv("HADOOP_HOME")
To test our defined or designed R in hadoop is working or not:
>library(rmr2)
>library(rhdfs)
>hdfs.init()
>ints = to.dfs(1:100)
>calc = mapreduce(input = ints,map = function(k, v) cbind(v, 2*v))
from.dfs(calc)
$val
v
[1,] 1 2
[2,] 2 4
[3,] 3 6
[4,] 4 8
[5,] 5 10
.....
If we want to run HDFS filesystem commands from R, we first need to initialize rhdfs using hdfs.init() function, then we can run the well-known ls, rm, mkdir, stat, etc commands:
> hdfs.init()
> hdfs.ls("/tmp")
permission owner group size modtime file
1 drwxr-xr-x istvan supergroup 0 2013-02-25 21:59 /tmp/RtmpC94L4R
2 drwxr-xr-x istvan supergroup 0 2013-02-25 21:49 /tmp/hadoop-istvan
> hdfs.stat("/tmp")
perms isDir block replication owner group size modtime path
1 rwxr-xr-x TRUE 0 0 istvan supergroup 0 45124-08-29
-------------Additional step if it is not working -------------------
yum -y --enablerepo=epel install R R-devel
R CMD javareconf
Start R REPL and install some packages:
install.packages(c('Rcpp', 'RJSONIO', 'itertools', 'digest'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
install.packages(c('functional', 'stringr', 'plyr'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
install.packages(c('rJava'), repos="http://cran.revolutionanalytics.com" )
install.packages(c('randomForest'), repos="http://cran.revolutionanalytics.com" )
install.packages(c('reshape2'), repos="http://cran.revolutionanalytics.com" )
Then dnload RHadoop and install
git clone git://github.com/RevolutionAnalytics/rmr2.git
R CMD INSTALL --byte-compile rmr2/pkg/
Set some environ vars in .bashrc:
export HADOOP_HOME=/usr/lib/hadoop
export HADOOP_CMD=/usr/bin/hadoop
export HADOOP_STREAMING=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-2.0.0-mr1-cdh4.2.0.jar
Make sure to source the new variables before continuing.
Installed rhdfs:
git clone git://github.com/RevolutionAnalytics/rhdfs.git
R CMD INSTALL --byte-compile rhdfs/pkg/
No comments:
Post a Comment