You are on page 1of 4

How to install Nutch 0.7.

1) JAVA Setup

Reference : http://java.sun.com/j2se/1.5.0/install-linux.html

cd /usr/local

Download JDK 5.0 Update 7 (jdk-1_5_0_07-linux-i586.bin file)from


http://java.sun.com/javase/downloads/index.jsp

chmod 755 jdk-1_5_0_07-linux-i586.bin

./jdk-1_5_0_07-linux-i586.bin

export PATH=/usr/local/jdk1.5.0_07/bin/:$PATH
export CLASSPATH=.

vi /etc/profile

#Add the following at the end of /etc/profile just after export PATH.
export JAVA_HOME=/usr/local/jdk1.5.0_07

2) Tomcat Setup

cd /tmp
wget http://apache.forbigweb.com/tomcat/tomcat-5/v5.5.17/bin/apache-tomcat-
5.5.17.tar.gz

tar zxvf apache-tomcat-5.5.17.tar.gz

mv apache-tomcat-5.5.17 /usr/share/tomcat5

cp -a /usr/share/tomcat5/conf/server.xml /usr/share/tomcat5/conf/server.xml.orig

For Multi-Lingual Search (Chinese etc.) for search add the line

URIEncoding="UTF-8" in the server.xml file as shown below in this example

****
<Connector port="8080"
maxThreads="50" minSpareThreads="5" maxSpareThreads="15"
disableUploadTimeout="true"
URIEncoding="UTF-8"/>

******

3) Nutch Setup

Reference:http://lucene.apache.org/nutch/tutorial.html

cd /tmp

wget http://apache.roweboat.net/lucene/nutch/nutch-0.7.2.tar.gz

tar zxvf nutch-0.7.2.tar.gz

mv nutch-0.7.2 /usr/local/nutch
cd /usr/local/nutch
vi urls

#add the line below


http://ing.clients.megaesecure.com

cd /usr/local/nutch/conf

cp -a crawl-urlfilter.txt crawl-urlfilter.txt.orig

vi crawl-urlfilter.txt
#Replace *MY.DOMAIN.NAME with your site url

cd /usr/local/nutch

bin/nutch crawl urls -dir crawl.test -depth 3 >& crawl.log

cd /usr/share/tomcat5/webapps

rm -rf ROOT*

cd /usr/local/nutch

cp nutch*.war /usr/share/tomcat5/webapps/ROOT.war

cd /usr/local/nutch/crawl.test

/usr/share/tomcat5/bin/catalina.sh start

Then visit http://localhost:8080/

4) Connecting Tomcat with Apache

References:
http://www.meritonlinesystems.com/docs/apache_tomcat_redhat.html
http://tomcat.apache.org/connectors-doc/howto/apache.html

Install the following RPMs if they are not already installed using yum :

* libtool
* automake
* autoconf

# Download mod_jk

cd /tmp

wget http://apache.mirrors.redwire.net/tomcat/tomcat-connectors/jk/source/jk-
1.2.18/tomcat-connectors-1.2.18-src.tar.gz

tar zxvf tomcat-connectors-1.2.18-src.tar.gz

cd /tmp/tomcat-connectors-1.2.18-src/native

./buildconf.sh
./configure --with-apxs=/usr/local/apache2/bin/apxs

make

cp /tmp/tomcat-connectors-1.2.18-src/native/apache-2.0/mod_jk.so
/usr/local/apache2/modules/

cd /usr/local/apache2/conf/

vi workers.properties

#Copy Paste the follwoing lines in the workers.properties file

# workers.properties - ajp13
#
# List workers
worker.list=wrkr
#
ps=/
workers.tomcat_home=/usr/share/tomcat5
workers.java_home=/usr/local/jdk1.5.0_07
# Define wrkr
worker.wrkr.port=8009
worker.wrkr.host=localhost
worker.wrkr.type=ajp13
worker.wrkr.cachesize=10
worker.wrkr.cache_timeout=600
worker.wrkr.socket_timeout=300

chmod 744 workers.properties

vi /usr/local/apache2/conf/httpd.conf

#Add the following to the bottom of the existing LoadModule directives in the
Global Environment section:

LoadModule jk_module modules/mod_jk.so

# Add the following to the bottom of the Main Server Configuration section:

JkWorkersFile "/usr/local/apache2/conf/workers.properties"
JkLogFile "/var/log/httpd/mod_jk.log"
JkLogLevel info
JkLogStampFormat "[%a %b %d %H:%M:%S %Y]"

#Set up a Virtual Host directive in the Virtual Hosts section of httpd.conf.

<VirtualHost *:80>
ServerAdmin server-manager@megaesecure.com
ServerName ing.clients.megaesecure.com
Alias /ROOT /usr/share/tomcat5/webapps/ROOT
DocumentRoot /usr/share/tomcat5/webapps/ROOT
ErrorLog /usr/share/tomcat5/logs/ing.clients.megaesecure.com_error_log
CustomLog /usr/share/tomcat5/logs/ing.clients.megaesecure.com_access_log
common
JkMount /*.jsp wrkr
# JkMount /servlet/* ROOT
# Deny direct access to WEB-INF
<LocationMatch ".*WEB-INF.*">
AllowOverride None
deny from all
</LocationMatch>
</VirtualHost>

# Restart Tomcat

cd /usr/local/nutch/crawl.test

/usr/share/tomcat5/bin/catalina.sh stop

# Ensure Tomcat is stopped by runing the following command


ps aux | grep tomcat

/usr/share/tomcat5/bin/catalina.sh start

# Restart Apache

/etc/rcd.d/init.d httpd restart

You might also like