Step 2. Add Hadoop user and group
Add a new user 'hduser' in group 'hadoop'
# sudo addgroup hadoop
# sudo adduser -ingroup hadoop hduserEnter password : hadoop123
# sudo adduser -ingroup hadoop hduserEnter password : hadoop123
Enter personal information
Add hduser to sudo group to have all rights
# sudo adduser hduser sudo
Add hduser to sudo group to have all rights
# sudo adduser hduser sudo
Step 5. Download Hadoop
Download Hadoop tarball hadoop-2.7.1.tar.gz in home dir (Exmaple : /home/shahnawaz/ ) from http://www.us.apache.org/dist/hadoop/common/hadoop-2.7.1/
OR
OR
Directly using command :
# wget http://www.us.apache.org/dist/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz
Step 6. Extract Hadoop tarball
# cd /home/shahnawaz
# sudo mv hadoop-2.7.1.tar.gz /usr/local/
# cd /usr/local/
# tar -xvf hadoop-2.7.1.tar.gz
# sudo mv hadoop-2.7.1 hadoop
# chown -R hduser:hadoop hadoop
Step 7. Test Hadoop working
# bin/hadoop
# wget http://www.us.apache.org/dist/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz
Step 6. Extract Hadoop tarball
# cd /home/shahnawaz
# sudo mv hadoop-2.7.1.tar.gz /usr/local/
# cd /usr/local/
# tar -xvf hadoop-2.7.1.tar.gz
# sudo mv hadoop-2.7.1 hadoop
# chown -R hduser:hadoop hadoop
Step 7. Test Hadoop working
# bin/hadoop
Step 8. Prepare Hadoop
Edit bashrc file using root
# sudo gedit .bashrc
Copy following lines at the end
# Set Hadoop-related environment variablesexport HADOOP_HOME=/usr/local/hadoop
export PIG_HOME=/usr/local/pig
export PIG_CLASSPATH=/usr/local/hadoop/conf
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on)export JAVA_HOME=/usr/lib/jvm/java-8-oracle
# Some convenient aliases and functions for running Hadoop-related commandsunalias fs &> /dev/null
alias fs="hadoop fs"
unalias hls &> /dev/null
alias hls="fs -ls"
# If you have LZO compression enabled in your Hadoop cluster and
# compress job outputs with LZOP (not covered in this tutorial):
# Conveniently inspect an LZOP compressed file from the command
# line; run via:
#
# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo
#
# Requires installed 'lzop' command.
#lzohead () {
hadoop fs -cat $1 | lzop -dc | head -1000 | less
}
# Add Hadoop bin/ directory to PATHexport PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$PIG_HOME/bin
# Set Hadoop-related environment variablesexport HADOOP_HOME=/usr/local/hadoop
export PIG_HOME=/usr/local/pig
export PIG_CLASSPATH=/usr/local/hadoop/conf
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on)export JAVA_HOME=/usr/lib/jvm/java-8-oracle
# Some convenient aliases and functions for running Hadoop-related commandsunalias fs &> /dev/null
alias fs="hadoop fs"
unalias hls &> /dev/null
alias hls="fs -ls"
# If you have LZO compression enabled in your Hadoop cluster and
# compress job outputs with LZOP (not covered in this tutorial):
# Conveniently inspect an LZOP compressed file from the command
# line; run via:
#
# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo
#
# Requires installed 'lzop' command.
#lzohead () {
hadoop fs -cat $1 | lzop -dc | head -1000 | less
}
# Add Hadoop bin/ directory to PATHexport PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$PIG_HOME/bin
Step 9. Configure Java path in hadoop-env.sh
$ sudo gedit /usr/local/hadoop/etc/hadoop/hadoop-env.sh
Update Java home path
export JAVA_HOME=/usr/lib/jvm/java-8-oracle
Step 10. Create some directories using root
# sudo mkdir -p /app/hadoop/tmp
# sudo chown -R hduser:hadoop /app/hadoop/tmp/
# sudo chmod -R 750 /app/hadoop/tmp/
# sudo chown -R hduser:hadoop /app/hadoop/tmp/
# sudo chmod -R 750 /app/hadoop/tmp/
Step 11. Edit configuration in core-site.xml
# sudo gedit /usr/local/hadoop/etc/hadoop/core-site.xml
Update configuration under configuration tag
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:54310</value>
</property>
</configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:54310</value>
</property>
</configuration>
Step 12. Edit configuration in hdfs-site.xml
# sudo gedit /usr/local/hadoop/etc/hadoop/hdfs-site.xml
Update configuration under configuration tag
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
Step 13. Test SSH
$ su - hduser
$ ssh localhost
$ ssh localhost
If connection success, type 'exit' to logout from SSH shell
Step 14. Set some generic Hadoop paths
# export HADOOP_DIR=/usr/local/hadoop/
# echo $HADOOP_DIR
# export HADOOP_HOME=/usr/local/hadoop/bin/
# echo $HADOOP_HOME
# PATH=$PATH:$HADOOP_HOME
# echo $PATH
# which hadoop
# echo $HADOOP_DIR
# export HADOOP_HOME=/usr/local/hadoop/bin/
# echo $HADOOP_HOME
# PATH=$PATH:$HADOOP_HOME
# echo $PATH
# which hadoop
Step 15. Format HDFS (using hduser)
$ HADOOP_DIR/bin/hdfs namenode -format
Step 16. Start and check HDFS (using hduser)
$ cd $HADOOP_DIR/sbin
$ ./start-dfs.sh
See all processes (including HDFS deamon)
$ jps
Check connection with HDFS - must be success
$ telnet localhost 54310
No comments:
Post a Comment
Note: only a member of this blog may post a comment.