============================== A way to install Giraph/Hadoop =============================== Prepare a (virtual) machine with Ubuntu 16.04.2 LTS installed. Then, follow the instructions below (type or copy&paste the commands in bash). * Installing OpenSSH server sudo apt-get install openssh-server * Installing public keys for SSH ssh-keygen # repeat hitting the Enter key cat ~/.ssh/id_ras.pub > ~/.ssh/authorized_keys * Installing Java 8 (Oracle) sudo add-apt-repository ppa:webupd8team/java sudo apt-get update sudo apt-get install oracle-java8-installer * Installing Hadoop 1.2.1 (pseudo distributed env. with localhost only) cd wget http://ftp.riken.jp/net/apache/hadoop/common/hadoop-1.2.1/hadoop-1.2.1.tar.gz tar xvzf hadoop-1.2.1.tar.gz cd hadoop-1.2.1 cat << EOF > conf/core-site.xml hadoop.tmp.dir $HOME/hdfs16/ fs.default.name hdfs://localhost:54310 EOF cat << EOF > conf/hdfs-site.xml dfs.replication 1 EOF cat << EOF > conf/mapred-site.xml mapred.child.java.opts -Xmx8000m mapred.job.tracker localhost:54311 mapred.tasktracker.map.tasks.maximum 4 mapred.map.tasks 4 mapreduce.job.counters.limit 5000000 EOF sed -i -e 's|.*JAVA_HOME=.*|export JAVA_HOME=/usr/lib/jvm/java-8-oracle/|' conf/hadoop-env.sh bin/hadoop namenode -format wget http://www.gutenberg.org/cache/epub/132/pg132.txt bin/start-all.sh bin/hadoop dfs -copyFromLocal pg132.txt /user/hduser/input/pg132.txt bin/hadoop dfs -ls /user/hduser/input bin/hadoop dfs -cat /user/hduser/input/pg132.txt bin/hadoop jar hadoop-examples-1.2.1.jar wordcount /user/hduser/input/pg132.txt /user/hduser/output/wordcount bin/hadoop dfs -cat /user/hduser/output/wordcount/p* | less # If you can see a table of counts of words, it works well. * Instaling Giraph 1.2.0 sudo apt-get install git sudo apt-get install maven echo "export JAVA_HOME=/usr/lib/jvm/java-8-oracle/" >> ~/.bashrc echo "export GIRAPH_HOME=~/giraph-1.2.0/" >> ~/.bashrc exit # open a new terminal, and continue cd wget http://ftp.jaist.ac.jp/pub/apache/giraph/giraph-1.2.0/giraph-dist-1.2.0-src.tar.bz2 tar xfvj giraph-dist-1.2.0-src.tar.bz2 cd giraph-1.2.0 mvn package -DskipTests cat << EOF > tiny_graph.txt [0,0,[[1,1],[3,3]]] [1,0,[[0,1],[2,2],[3,1]]] [2,0,[[1,2],[4,4]]] [3,0,[[0,3],[1,1],[4,4]]] [4,0,[[3,4],[2,4]]] EOF ~/hadoop-1.2.1/bin/hadoop dfs -copyFromLocal tiny_graph.txt /user/hduser/input/tiny_graph.txt ~/hadoop-1.2.1/bin/hadoop dfs -ls /user/hduser/input ~/hadoop-1.2.1/bin/hadoop jar $GIRAPH_HOME/giraph-examples/target/giraph-examples-1.2.0-for-hadoop-1.2.1-jar-with-dependencies.jar org.apache.giraph.GiraphRunner org.apache.giraph.examples.SimpleShortestPathsComputation -vif org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexInputFormat -vip /user/hduser/input/tiny_graph.txt -vof org.apache.giraph.io.formats.IdWithValueTextOutputFormat -op /user/hduser/output/shortestpaths -w 1 ~/hadoop-1.2.1/bin/hadoop dfs -cat /user/hduser/output/shortestpaths/* # If you can see the output like below (the shortest length for every vertex), it works well. # # 0 1.0 # 1 0.0 # 2 2.0 # 3 1.0 # 4 5.0