9 files changed, 2156 insertions, 0 deletions
diff --git a/lib/gorg/base.rb b/lib/gorg/base.rb
new file mode 100644
index 0000000..c3851a9
--- /dev/null
+++ b/lib/gorg/base.rb
@@ -0,0 +1,602 @@
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with Foobar; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+module Gorg
+  Version = "0.6"
+end
+  
+# Some required stuff for gorg
+require 'time'
+
+require 'gorg/xsl'
+require 'gorg/log'
+require 'gorg/cache'
+require 'timeout'
+require 'cgi'
+require 'stringio'
+require 'zlib'
+require 'ipaddr'
+
+
+module Gorg
+
+  def xproc(path, params, list=false, printredirect=false)
+    # Process file through xslt passing params to the processor
+    # path should be the absolute path of the file, i.e. not relative to DocumentRoot
+    #
+    #    Since 0.4, path can also be a string containing
+    #    the actual xml to be processed
+    #
+    # Use default stylesheet if none can be found in the file
+    # Return a list of files read by the processor (useful to do caching) if requested
+    #
+    # Return an error condition and, hopefully, some useful output
+    # Do not raise any exception
+    # In most cases, an error will result in no output but
+    # the xslt processor can consider some errors as warnings and
+    # return the best result it could come up with along with a warning
+    # e.g. if a file used in a document() function cannot be found,
+    # the xslt processor will return some output and a warning.
+    # It's up to the caller to decide whether to use the output or b0rk
+    #
+    # The return value is an array of 2 to 4 items: [{}, "", [[]], []]
+    # 1. hash with error information, its keys are
+    # 1.a  "xmlErrCode"  0 is no error, -9999 means an exception has been raised in this block (unlikely),
+    #      anything else is an error code (see /usr/include/libxml2/libxml/xmlerror.h)
+    # 1.b  "xmlErrLevel" again, from libxml2, 0==OK, 1==Warning, 2==Error, 3==Fatal
+    # 1.c  "xmlErrLevel" again, from libxml2, some explanation about what went wrong
+    # 2. output from xsltprocessor (or error message from a raised exception)
+    # 3. list of files that the xslt processor accessed if the list was requested,
+    #    paths are absolute, i.e. not relative to your docroot.
+    #    Each entry is an array [access type, path] with access_type being
+    #      "r" for read, "w" for written (with exsl:document) or "o" for other (ftp:// or http://)
+    # 4. array of CGI::Cookie to be sent back
+    #
+    # Examples: [{"xmlErrMsg"=>"blah warning blah", "xmlErrCode"=>1509, "xmlErrLevel"=>1}, "This is the best XSLT could do!", nil]
+    #           [{"xmlErrCode"=>0}, "Result of XSLT processing. Well done!", ["/etc/xml/catalog","/var/www/localhost/htdocs/doc/en/index.xml","/var/www/localhost/htdocs/dtd/guide.dtd"]]
+
+    xsltproc = Gorg::XSL.new
+    xsltproc.xroot = $Config["root"]
+    # Grab strings from xsl:message
+    xslMessages = []
+    # Does the caller want a list of accessed files?
+    xsltproc.xtrack = list; filelist = Array.new
+    # Process .xml file with stylesheet(s) specified in file, or with default stylesheet
+    xsltproc.xml = path
+    # Look for stylesheet href (there can be more than one)
+    regexp = Regexp.new('<\?xml-stylesheet.*href="([^"]*)".*')
+    l = $Config["headXSL"] ; styles = Array.new
+    if FileTest.file?(path) then
+      # Path is indeed a file name
+      IO.foreach(path) { |line|
+        styles << $1 if regexp.match(line)
+        break if (l-=1) == 0
+      }
+    else
+      # Scan xml for stylesheet names
+      path.each { |line| styles << $1 if regexp.match(line) }
+    end
+    # Use default stylesheet if none were found in the doc
+    styles << $Config["defaultXSL"] if styles.length == 0
+    # Add params, we expect a hash of {param name => param value,...}
+    xsltproc.xparams = params
+    # Process through list of stylesheets
+    firstErr = {}
+    while xsltproc.xsl = styles.shift
+      xsltproc.process
+      filelist += xsltproc.xfiles if xsltproc.xtrack?
+      # Break and raise 301 on redirects
+      xsltproc.xmsg.each { |r|
+        if r =~ /Redirect=(.+)/ then
+          if printredirect then
+            STDERR.puts "Location: #{$1}"
+          else
+            raise Gorg::Status::MovedPermanently.new($1)
+          end
+        end
+      }
+      xslMessages += xsltproc.xmsg
+      # Remember 1st warning / error
+      firstErr = xsltproc.xerr if firstErr["xmlErrLevel"].nil? && xsltproc.xerr["xmlErrLevel"] > 0
+      # B0rk on error, an exception should have been raised by the lib, but, er, well, you never know
+      break if xsltproc.xerr["xmlErrLevel"] > 1 
+      xsltproc.xml = xsltproc.xres
+    end
+    # Keep 1st warning / error if there has been one
+    firstErr = xsltproc.xerr if firstErr["xmlErrLevel"].nil?
+    # Return values
+    [ firstErr, xsltproc.xres, (filelist.uniq if xsltproc.xtrack?), xslMessages ]
+  rescue => ex
+    if ex.respond_to?(:errCode) then
+      # One of ours (Gorg::Status::HTTPStatus)
+      # Propagate exception
+      raise
+    else
+      debug "in xproc exception handler: #{ex.inspect} // #{xsltproc.xerr.inspect}"
+      # Return exception message and an error hash as expected from the xslt processor
+      # Use error codes that the xslt lib might have returned
+      [ if (xsltproc.xerr["xmlErrCode"]||-1) == 0 then
+        { "xmlErrMsg"   => ex.to_s,
+          "xmlErrCode"  => 9999,
+          "xmlErrLevel" => 3
+        }
+        else
+        { "xmlErrMsg"   => xsltproc.xerr["xmlErrMsg"] || ex.to_s,
+          "xmlErrCode"  => xsltproc.xerr["xmlErrCode"],
+          "xmlErrLevel" => xsltproc.xerr["xmlErrLevel"]
+        }
+        end ,
+        ex.to_s,
+        (filelist.uniq if xsltproc.xtrack?)
+      ]
+    end
+  end
+  
+  # HTTP status codes and html output  
+  module Status
+    class HTTPStatus < StandardError
+      def html(err="")
+        <<-EOR
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+<HTML>
+<HEAD><TITLE>#{errSts}</TITLE></HEAD>
+<BODY>
+<H1>#{errLabel}</H1>
+<font color="#FF0000">#{err}</font>
+<HR>
+</BODY>
+</HTML>
+      EOR
+      end
+      def errSts
+        "#{errCode} #{errLabel}"
+      end
+      # Default is unknown error
+      def errLabel
+        "Undefined Error"
+      end
+      def errCode
+        999
+      end
+      def header
+        {'Status' => errSts}
+      end
+    end
+    
+    class NotModified < HTTPStatus
+      def initialize(stat)
+        # 304 needs to send ETag and Last-Modified back
+        @mstat=stat
+      end
+      def header
+        {'Last-Modified' => @mstat.mtime.httpdate.dup, 'ETag' => makeETag(@mstat).dup}.merge(super)
+      end
+      def html
+        ""
+      end
+      def errLabel
+        "Not Modified"
+      end
+      def errCode
+        304
+      end
+    end
+    
+    class MovedPermanently   < HTTPStatus
+      def initialize(loc)
+        # 301 needs to send Location:
+        @location=loc
+      end
+      def errLabel
+        "Moved Permanently"
+      end
+      def errCode
+        301
+      end
+      def header
+        {'Location' => @location}.merge(super)
+      end
+      def html
+        # RFC says "should" not "must" add a body
+        ""
+      end
+      def html301 # Not used
+        <<-EO301
+<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
+<html><head>
+<title>301 Moved Permanently</title>
+</head><body>
+<h1>Moved Permanently</h1>
+<p>The document has moved <a href="#{@location}">here</a>.</p>
+</body></html>
+        EO301
+      end
+    end
+    
+    class Forbidden   < HTTPStatus
+      def errLabel
+        "Forbidden"
+      end
+      def errCode
+        403
+      end
+    end
+    
+    class NotFound    < HTTPStatus 
+      def errLabel
+        "Not Found"
+      end
+      def errCode
+        404
+      end
+    end
+    
+    class NotAllowed    < HTTPStatus 
+      def errLabel
+        "Method Not Allowed"
+      end
+      def header
+        {'Allow'=>'GET,HEAD'}.merge(super)
+      end
+      def errCode
+        405
+      end
+    end
+    
+    class SysError    < HTTPStatus 
+      def errLabel
+        "Internal Server Error"
+      end
+      def errCode
+        500
+      end
+    end
+  end #Status module
+  
+  
+  def gorgInit
+    # Initialize gorg, i.e. read config file, init cache, ...
+    # Simply build a hash of params => value in a global variable called $Config
+    
+    # Set up default values
+    $Config = { "AppName" => "gorg",    # Used for syslog entries, please keep 'gorg' (cannot be changed in config file)
+                "root" => nil,          # No root dir by default (cgi uses DOCUMENT_ROOT from its environment)
+                "port" => 8000,         # Used for stand-alone web server (WEBrick)
+                "headXSL" => 12,        # Only read 12 lines in xml files to identify required stylesheets
+                "defaultXSL" => nil,    # No default stylesheet, how could I guess?
+                "cacheDir" => nil,      # No cache by default. Directory must exist and be writable.
+                "cacheTTL" => 0,        # Number of seconds after which a document is considered too old, 0=never
+                "cacheSize" => 40,      # in MegaBytes, max size of cache, used when autocleanig
+                "zipLevel" => 2,        # Compresion level used for gzip support (HTTP accept_encoding) (0-9, 0=none, 9=max)
+                "maxFiles" => 9999,     # Max number of files in a single directory in the cache tree
+                "cacheTree" => 0,       # Use same tree as on site in cache, 0 = disabled
+                "cacheWash" => 0,       # Clean cache automatically and regularly when a store into the cache occurs. 0 = disabled
+                                        #  gorg cleans up if random(param_value) < 10. It will only clean same dir it caches to, not whole tree.
+                                        # i.e. a value<=10 means at every call (not a good idea), 100 means once/10 stores, 1000 means once/100 stores
+                "logLevel" => 4,        # INFO, be slightly verbose by default (messages go to syslog) OFF, FATAL, ERROR, WARN, INFO, DEBUG = 0, 1, 2, 3, 4, 5
+                "passthru" => true,     # Allow return of requested file without processing it if passthru="anything but 0" is passed
+                "acceptCookies" =>false,# Allow cookies in & out of transforms
+                "linkParam" => "link",  # Pass pathname of requested file in 'link' param to xsl transform
+                "HTTP_HOST" => nil,     # Pass host value from HTTP header to xsl transform
+                "accessLog" => "syslog",# or a filename or STDERR, used to report hits from WEBrick, not used by cgi's
+                "autoKill" => 0,        # Only used by fastCGI, exit after so many requests (0 means no, <=1000 means 1000). Just in case you fear memory leaks.
+                "in/out" => [],         # (In/Ex)clude files from indexing
+                "mounts" => [],         # Extran mounts for stand-alone server
+                "listen" => "127.0.0.1" # Let webrick listen on given IP
+            }
+    # Always open syslog
+    @syslog = Gorg::Log::MySyslog.new($Config["AppName"])
+    $Log = Gorg::Log::MyLog.new(@syslog, 5) # Start with max
+    
+    # Check for config file
+    configf = ENV["GORG_CONF"]||"/etc/gorg/gorg.conf"
+    raise "Cannot find config file (#{configf})" unless FileTest.file?(configf) and FileTest.readable?(configf)
+    file = IO.read(configf)
+    parseConfig($Config, file)
+
+    # Init cache
+    Cache.init($Config) if $Config["cacheDir"]
+    
+    # Set requested log level
+    $Log.level = $Config["logLevel"]
+  rescue
+    error("Gorg::init failed: #{$!}")
+    STDERR.puts("Gorg::init failed: #{$!}")
+    exit(1)
+  end
+
+  def scanParams(argv)
+    # Scan argv for --param paramName paramValue sequences
+    # params are removed from argv
+    # Return a hash of {"name" => "value"}
+    h = Hash.new
+    while idx = argv.index('--param')
+      break if argv.length <= idx+2   # We need at least 2 more args after --param
+      argv.delete_at(idx)             # Remove --param from argv
+      name  = argv.delete_at(idx)     # Remove param name from argv
+      value = argv.delete_at(idx)     # Remove param value from argv
+      h[name] = value                 # Add entry in result
+    end
+    
+    h if h.length > 0
+  end  
+  
+  private
+  def parseConfig(h, config)
+    config.each {|line|
+      line.strip!
+      next if line.length == 0 or line[0,1] == '#' # Skip blank lines and comments
+      raise "Invalid Configuration (#{line})" unless line =~ /^([a-zA-Z_]*)\s*=\s*/
+      param = $1
+      value = $'
+      # If value starts with ' or ", it ends with a similar sign and does not accept any in the value, no escaping... We keep it simple
+      # otherwise, it ends with EOL or first space
+      if value =~ /["'](.*)['"]/ then
+        value = $1
+      end
+      value.strip!
+      raise "No value for #{param}" unless value.length > 0
+      # Check param / value (only syntactical checks here)
+      case param.downcase
+      when "root"
+       h["root"] = value
+      when "port"
+       h["port"] = value.to_i
+      when "passthru"
+       h["passthru"] = value.squeeze != "0"
+      when "acceptcookies"
+       h["acceptCookies"] = value.squeeze == "1"
+      when "linkparam"
+       if value =~ /^\s*([a-zA-Z]+)\s*$/ then
+         h["linkParam"] = $1
+       else
+         h["linkParam"] = nil
+       end
+      when "httphost"
+        hosts = value.squeeze(" ")
+        case hosts
+          when /^0?$/ 
+            hh = nil
+          when "*"
+            hh = ["*"]
+          else
+            hh = hosts.split(" ")
+            # Add IPs
+            hosts.split(" ").each { |ho|
+              begin
+                hh += TCPSocket.gethostbyname(ho)[3..-1] if ho != '*'
+              rescue
+                # Ignore
+                nil
+              end
+            }
+            hh.uniq!
+        end
+        h["httphost"] = hh
+      when "headxsl"
+       h["headXSL"] = value.to_i
+      when "defaultxsl"
+       h["defaultXSL"] = value
+      when "cachedir"
+       h["cacheDir"] = value
+      when "cachettl"
+       h["cacheTTL"] = value.to_i
+      when "cachesize"
+       h["cacheSize"] = value.to_i
+      when "maxfiles"
+       h["maxFiles"] = value.to_i
+      when "cachetree"
+       h["cacheTree"] = value.squeeze != "0"
+      when "ziplevel"
+       if value =~ /^\s*([0-9])\s*$/ then
+         h["zipLevel"] = $1.to_i
+       else
+         h["zipLevel"] = 2
+       end
+      when "cachewash"
+       h["cacheWash"] = value.to_i
+      when "loglevel"
+       h["logLevel"] = value.to_i
+      when "accesslog"
+       h["accessLog"] = value
+      when "autokill"
+       h["autoKill"] = value.to_i
+      when "listen"
+       begin
+         ip = IPAddr.new(value)
+         h["listen"] = ip.to_s
+       rescue
+         h["listen"] = "127.0.0.1"
+       end
+      when "dbconnect"
+        h["dbConnect"] = value
+      when "dbuser"
+        h["dbUser"] = value
+      when "dbpassword"
+        h["dbPassword"] = value
+      when "exclude"
+        h["in/out"] << [false, Regexp.new(value)]
+      when "include"
+        h["in/out"] << [true,  Regexp.new(value)]
+      when "fpath_to_lang"
+        h["flang"] = Regexp.new(value)
+      when "xpath_to_lang"
+        h["xlang"] = value      
+      when "mount"
+        if value =~ /^([^\s]+)\s+ON\s+(.+)$/i then
+          h["mounts"] << [$1, $2]
+        end
+      else
+        raise "Unknown parameter (#{param})"
+      end
+    }
+  rescue
+    raise "Could not parse config file: #{$!}"
+  end
+  
+  # Utilities
+  def contentType(aMsg)
+    # Find the Content-Type=xxx/yyy line in aMsg
+    # from the Meta file in the cache
+    ct = nil
+    aMsg.each { |s|
+      if s =~ /^Content-Type:(.+)$/ then
+        ct = $1
+        break
+      end
+    }
+    ct
+  end
+  
+  def setContentType(data)
+    # Set content-type according to x(ht)ml headers
+    charset = nil
+    if data =~ /^<\?xml .*encoding=['"](.+)['"]/i then
+      charset = $1 if $1
+      # XML / XHTML
+      if data[0..250] =~ /^<\!DOCTYPE\s+html/i then
+        # XHTML
+        ct = 'application/xhtml+xml'
+      else
+        # XML
+        ct = 'text/xml'
+      end
+      if charset then
+        ct << "; charset=#{charset}"
+      end
+    elsif data =~ /^<\!DOCTYPE\s+html\sPUBLIC\s(.+DTD XHTML)?/i then
+      # (X)HTML
+      if $1 then
+        # XHTML
+        ct = 'application/xhtml+xml'
+      else
+        # HTML
+        ct = 'text/html'
+      end
+    elsif data =~ /<html/i then
+      # HTML
+      ct = 'text/html'
+    else
+      # TXT
+      ct = 'text/plain'
+    end
+    ct
+  end
+
+  def makeCookies(aMsg)
+    # Make an array of CGI::Cookie objects
+    # msg is expected to be an array of strings like 'Set-Cookie(name)value=param'
+    # (output by the xsl transform with xsl:message)
+    cookies = Hash.new
+    aMsg.each { |s|
+      if s =~ /^Set-Cookie\(([^\)]+)\)([a-zA-Z0-9_-]+)=(.+)$/ then
+        # $1 = cookie name   $2 = key   $3 = value
+        if cookies.has_key?($1) then
+          cookies[$1] << "#{$2}=#{$3}"
+        else
+          cookies[$1] = ["#{$2}=#{$3}"]
+        end
+      end
+    }
+    if cookies.length > 0 then
+      # Make CGI::Cookie objects
+      cookies.map { |k,v| 
+        CGI::Cookie.new('name' => k, 'value' => v, 'expires' => Time.now + 3600*24*30)
+      }
+    else
+      nil
+    end
+  end
+  
+  def cookies_to_params(cookies)
+    # Turn array of CGI::Cookie objects into a Hash of key=>value
+    # cookies is a hash, forget the keys,
+    # each value should be an array of strings, each string should be like 'param=value'
+    h = {}
+    cookies.values.each { |v|
+      if v.class==String and v =~ /^([a-zA-Z0-9_-]+)=(.+)$/ then
+        h[$1] = $2
+      elsif v.class==Array then
+        v.each { |vv|
+          if vv.class==String and vv =~ /^([a-zA-Z0-9_-]+)=(.+)$/ then
+            h[$1] = $2
+          end
+        }
+      elsif v.class==CGI::Cookie then
+        v.value.each { |vv|
+          if vv.class==String and vv =~ /^([a-zA-Z0-9_-]+)=(.+)$/ then
+            h[$1] = $2
+          end
+        }
+      end
+    }
+    h
+  rescue
+    error "Could not parse cookies (#{$!}) "
+    {}
+  end
+  
+  def notModified?(fstat, etags, ifmodsince)
+    # Decide whether file has been modified according to either etag, last mod timestamp or both
+    # If both If-None-Match and If-Modified-Since request header fields are present,
+    # they have to be tested both
+    res = false
+    if fstat then
+      a = etags.to_a
+      if ifmodsince && etags then
+        res = (ifmodsince >= fstat.mtime) && (a.include?(makeETag(fstat)) || a.include?('*'))
+      elsif etags
+        res = a.include?(makeETag(fstat)) || a.include?('*')
+      elsif ifmodsince
+        res = ifmodsince >= fstat.mtime
+      end
+    end
+    # Return result
+    res
+  end  
+  
+  def split_header_etags(str)
+    # Split header values expected as "value1", "value2", ... into an array of strings
+    str.scan(/((?:"(?:\\.|[^"])+?"|[^",]+)+)(?:,\s*|\Z)/xn).collect{|v| v[0].strip }
+  end
+ 
+  def makeETag(st)
+    # Format file stat object into an ETag using its size & mtime
+    # Parameter can either be a filename or a stat object
+    st = File.stat(st) unless st.respond_to?(:ino)
+    sprintf('"%x-%x"', st.size, st.mtime.to_i)
+  end
+  
+  def gzip(data, level)
+    gz = ""
+    io = StringIO.new(gz)
+    gzw = Zlib::GzipWriter.new(io, level)
+    gzw.write data
+    gzw.close
+    gz
+  end
+  
+  def gunzip(data)
+    io = StringIO.new(data)
+    gzw = Zlib::GzipReader.new(io)
+    gunz = gzw.read
+    gzw.close
+    gunz
+  end
+
+end
diff --git a/lib/gorg/cache.rb b/lib/gorg/cache.rb
new file mode 100644
index 0000000..543b6a2
--- /dev/null
+++ b/lib/gorg/cache.rb
@@ -0,0 +1,493 @@
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with Foobar; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+# Cache a bit of data based on 
+#  . a path name as received by a webserver e.g.
+#  . a list of parameters as received by a webserver e.g.
+#  . a list of files it depends on
+
+require "parsedate"
+require "fileutils"
+require "find"
+require "digest"
+require "digest/md5"
+
+module Gorg
+
+CacheStamp = "Gorg-#{Gorg::Version} Cached This Data. Do not alter this file. Thanks."
+
+module Cache
+  def Cache.init(config)
+    @@lockfile = ".cache.cleaner.lock"
+    @cacheDir = nil
+    if FileTest.directory?(config["cacheDir"])
+      if FileTest.writable?(config["cacheDir"])
+        @cacheDir = config["cacheDir"].chomp("/")
+      else
+        warn "Cache directory not writable"
+      end
+    else
+      warn "Invalid cache directory"
+    end
+
+    # Time-To-Live in seconds, cached items older than that will be considered too old
+    @zipLevel = config["zipLevel"]
+    @zip = @zipLevel > 0 ? ".gz" : ""
+    @ttl = config["cacheTTL"]
+    @cacheTree = config["cacheTree"]
+    @maxFiles = config["maxFiles"]            # Max number of files in a single directory
+    @maxSize = config["cacheSize"]*1024*1024  # Now in bytes
+    @washNumber = config["cacheWash"]         # Clean cache dir after a store operation whenever rand(@washNumber) < 10
+    @lastCleanup = Time.new-8e8               # Remember last time we started a cleanup so we don't pile them up
+  end
+  
+  def Cache.hit(objPath, objParam={}, etags=nil, ifmodsince=nil)
+    # objPath is typically a requested path passed from a web request but it
+    # can be just any string. It is not checked against any actual files on the file system
+    #
+    # objParam is expected to be a hash or any object whose iterator yields two values
+    #
+    # 2 filenames are built with the arguments and should give 
+    # the name of a metafile and a result file
+    # if the result file is older than @ttl seconds, hit fails
+    # The metafile is then checked for dependencies
+    # It contains a list of filenames along with their size and mtime separated by ;;
+    
+    # etag and ifmodsince are used in a webserver context
+    #   etag is defined if an ETag was part of an If-None-Match request field
+    #   etag can be an array or a single string
+    #   If the current ETag of the meta file matches, no data is returned (webserver should return a 304)
+    #
+    #   ifmodsince is a time object passed on an If-Modified-Since request field
+    #   If the creation date of the meta file is earlier, no data is returned (webserver should return a 304)
+
+    return nil if @cacheDir.nil? # Not initialized, ignore request
+    
+    # Reminder: filenames are full path, no need to prepend dirname
+    dirname, basename, filename, metaname = makeNames(objPath, objParam)
+    
+    raise "Cache subdir does not exist" unless FileTest.directory?(dirname)
+
+    # Hit the cache
+    meta, mstat = IO.read(metaname), File.stat(metaname)  if metaname && FileTest.file?(metaname) && FileTest.readable?(metaname)
+    raise "Empty/No meta file" if meta.nil? || meta.length < 1
+
+    fstat = File.stat(filename) if filename && FileTest.file?(filename)
+    raise "Empty/No data file" if fstat.nil?
+
+    # Check the timestamps of files in the metadata
+    meta = meta.split("\n")
+    raise "I did not write that meta file" unless CacheStamp == meta.shift
+    mline = meta.shift
+    while mline and mline !~ /^;;extra meta$/ do
+      f, s, d = mline.split(";;")
+      if s.to_i < 0
+        # File did not exist when cache entry was created
+        raise "Required file #{f} has (re)appeared" if FileTest.file?(f) && FileTest.readable?(f)
+      else
+        # File did exist when cache entry was created, is it still there?
+        raise "Required file #{f} has disappeared" unless FileTest.file?(f) && FileTest.readable?(f)
+      
+        fst = File.stat(f)
+        raise "Size of #{f} has changed from #{fst.size} to #{s.to_i}" unless fst.size == s.to_i
+        raise "Timestamp of #{f} has changed" unless Time.utc(*ParseDate.parsedate(d)) == fst.mtime.utc
+      end
+      mline = meta.shift
+    end
+    if mline =~ /^;;extra meta$/ then
+      extrameta = meta.dup
+    else
+      extrameta = []
+    end
+    
+    if notModified?(fstat, etags, ifmodsince) and extrameta.join !~ /set-cookie/i
+      raise Gorg::Status::NotModified.new(fstat)
+    end
+    
+    file = IO.read(filename) if filename && FileTest.file?(filename) && FileTest.readable?(filename)
+    raise "Empty/No data file" if file.nil? || file.length < 1
+
+    # Is the data file too old
+    raise "Data file too old" unless @ttl==0 or (Time.new - fstat.mtime) < @ttl
+    
+    # Update atime of files, ignore failures as files might have just been removed
+    begin
+      t = Time.new
+      File.utime(t, fstat.mtime, filename)
+      File.utime(t, mstat.mtime, metaname)
+    rescue
+      nil
+    end
+    
+    # If we get here, it means the data file can be used, return cache object (data, stat(datafile), extrameta)
+    # The file is left (un)compressed, it's returned as it was stored
+    [file, fstat, extrameta]
+    
+  rescue Gorg::Status::NotModified
+    # Nothing changed, should return a 304
+    debug("Client cache is up-to-date")
+    raise
+  rescue
+    # cache hit fails if anything goes wrong, no exception raised
+    debug("Cache hit on #{objPath} failed: (#{$!})")
+    nil
+  end
+
+
+  def Cache.store(data, objPath, objParam={}, deps=[], extrameta=[])
+    # Store data in cache so it can be retrieved based on the objPath and objParams
+    # deps should contain a list of files that the object depends on
+    # as returnd by our xsl processor, i.e. an array of [access_type, path] where
+    # access_type can be "r", "w", or "o" for recpectively read, write, other.
+
+    # Define content-type
+    ct = setContentType(data)
+    extrameta << "Content-Type:#{ct}"
+    
+    return nil if @cacheDir.nil? # Not initialized, ignore request
+    
+    # Cache only if no remote objects (ftp:// or http://) in list of used files
+    if deps && deps.detect{|f| f[0] =~ /^o$/i }
+      debug "#{objPath} not cached because it needs remote resources"
+      return nil
+    end
+
+    dirname, basename, filename, metaname = makeNames(objPath, objParam)
+
+    FileUtils.mkdir_p(dirname) unless FileTest.directory?(dirname)
+    
+    # Write Meta file to a temp file (with .timestamp.randomNumber appended)
+    metaname_t = "#{metaname}.#{Time.new.strftime('%Y%m%d%H%M%S')}.#{rand(9999)}"
+
+    # Data might need to be just a link to another .Data file
+    # if we find another requested path with different params but
+    # with identical MD5 sums
+    # Which is why we keep a ...xml.Data.[md5 sum] file without the parameters
+    # in its name that we can hard link to.
+    # e.g. A moron hits for 10 full handbooks with toto=1..10 in the URI,
+    # we'd end up with 10 identical large copies. With links we have only one
+
+    # Old versions are expected to be cleaned up by the cacheWash() routine
+    # A Dir.glob() to find the previous ones would be too expensive
+    
+    # Compute MD5 digest
+    md5 = Digest::MD5.hexdigest(data)
+    
+    # Compress data if required
+    if @zipLevel > 0 then
+      bodyZ = data = gzip(data, @zipLevel)
+    else
+      bodyZ = nil
+    end
+    
+    # Set mtime of data file to latest mtime of all required files
+    # so that caching can work better because mtimes will be
+    # identical on all webnodes whereas creation date of data
+    # would be different on all nodes.
+    maxmtime = Time.now-8e8
+    fstat = nil
+    
+    begin
+      timeout(10){
+        File.open("#{metaname_t}", "w") {|fmeta|
+          fmeta.puts(CacheStamp)
+          # Write filename;;size;;mtime for each file in deps[]
+          deps.each {|ffe|
+            ftype = ffe[0]
+            fdep = ffe[1]
+            if FileTest.file?(fdep)
+              s = File.stat(fdep)
+              fmeta.puts("#{fdep};;#{s.size};;#{s.mtime.utc};;#{ftype}")
+              maxmtime = s.mtime if s.mtime > maxmtime and ftype =~ /^r$/i
+            else
+              # A required file does not exist, use size=-1 and old timestamp
+              # so that when the file comes back, the cache notices a difference
+              # and no cache miss gets triggered as long as file does not exist
+              fmeta.puts("#{fdep};;-1;;Thu Nov 11 11:11:11 UTC 1971")
+            end
+          }
+          fmeta.puts ";;extra meta"
+          extrameta.each { |m| fmeta.puts m }
+        }
+        # Get exclusive access to the cache directory while moving files and/or creating data files
+        File.open(dirname) { |lockd|
+          while not lockd.flock(File::LOCK_NB|File::LOCK_EX)
+            # Timeout does not occur on a blocking lock
+            # Try a non-bloking one repeatedly for a few seconds until timeout occurs or lock is granted
+            # We are in a timeout block, remember
+            sleep 0.1
+          end
+          # Remove previous Data
+          FileUtils.rm_rf(filename)
+
+          # mv temp meta file to meta file
+          FileUtils.mv(metaname_t, metaname)
+
+          # We keep a data file for the same requested path, with different params,
+          # but which ends up with same MD5 sum, i.e. identical results because of unused params
+          linkname = "#{basename}.#{md5}#{@zip}"
+          if FileTest.file?(linkname) then
+            # Data file already there, link to it
+            File.link(linkname, filename)
+          else
+            # Write data file and set its mtime to latest of all files it depends on
+            File.open("#{filename}", "w") {|fdata| fdata.write(data)}
+            # Create link
+            File.link(filename, linkname)
+          end
+          # mtime might need to be updated, or needs to be set
+          # e.g. when a dependency had changed but result files is identical
+          # This is needed to keep Last-Modified dates consistent across web nodes
+          File.utime(Time.now, maxmtime, filename)
+          fstat = File.stat(filename)
+        }
+      }
+    ensure
+      FileUtils.rm_rf(metaname_t)
+    end
+    
+    # Do we clean the cache?
+    washCache(dirname, 10) if @washNumber > 0 and rand(@washNumber) < 10
+    
+    # Return stat(datafile) even if it's just been removed by washCache
+    # because another web node might still have it or will have it.
+    # Anyway, the cached item would be regenerated on a later request
+    # and a 304 would be returned if still appropriate at the time.
+
+    # Return fstat of data file (for etag...) and zipped file
+    [fstat, bodyZ]
+    
+  rescue Timeout::Error, StandardError =>ex
+    if ex.class.to_s =~ /timeout::error/i then
+      warn("Timeout in cache store operation")
+    else
+      warn("Cache store error (#{$!})")
+    end
+    # Clean up before leaving
+    FileUtils.rm_rf(filename||"")
+    FileUtils.rm_rf(metaname||"")
+    nil # return nil so that caller can act if a failed store really is a problem
+  end
+    
+    
+  def Cache.washCache(dirname, tmout=30, cleanTree=false)
+    # Clean cache entries that are either too old compared to TTL (in seconds)
+    # or reduce total size to maxSize (in MB)
+    # oldDataOnly means to look only for unused *.Data.[md5] files that are not used anymore 
+    # because file has been modified and has generated a new *.Data.[md5] file
+    
+    # timeout is the maximum time (in seconds) spent in here
+
+    return nil if @cacheDir.nil? # Not initialized, ignore request
+    
+    # Also ignore request if dirname not equal to @cacheDir or under it
+    return nil unless dirname[0, @cacheDir.length] == @cacheDir
+    
+    # Also ignore request if dirname does not exist yet
+    return nil unless FileTest.directory?(dirname)
+    
+    # Also return if less than a minute has elapsed since latest cleanup
+    t0 = Time.new
+    return nil if t0 - @lastCleanup < 60
+    
+    # Remember for next time
+    @lastCleanup = t0
+
+    Dir.chdir(dirname) { |d|
+      # Recreate lock file if it's been lost
+      unless File.exist?(@@lockfile)
+        File.open(@@lockfile, "w") { |lockf| lockf.puts("Lock file created on #{Time.now.utc} by gorg")}
+      end
+        
+      # Grab lockfile
+      File.open(@@lockfile) { |lockf| 
+        if lockf.flock(File::LOCK_NB|File::LOCK_EX) then
+          infoMsg = "Cleaning up cache in #{dirname} (cleanTree=#{cleanTree}, tmout=#{tmout})"
+          info(infoMsg)
+          puts infoMsg if cleanTree
+
+          timeout(tmout) {
+            totalSize, deletedFiles, scannedDirectories = washDir(dirname, cleanTree)
+            if totalSize >= 0 then
+              # Size == -1 means dir was locked, throwing an exception would have been nice :)
+              infoMsg = if cleanTree then
+                          "Cache in #{dirname} is now #{totalSize/1024/1024} MB, #{deletedFiles} files removed in #{(Time.now-t0).to_i} seconds in #{scannedDirectories} directories"
+                        else
+                          "#{deletedFiles} files removed in #{(Time.now-t0).to_i} seconds in #{dirname}"
+                        end
+              info(infoMsg)
+              puts infoMsg if cleanTree
+            end
+          }
+        else
+          # Locked dir, another process is busy cleaning up/
+          debug("#{dirname} locked, skipping")
+          puts("#{dirname} locked, skipping") if cleanTree
+        end # of lock test
+      } # end of File.open(@@lockfile),  close & release lock automatically
+    }
+  rescue Timeout::Error
+    info("Timeout while cleaning #{dirname}")
+    puts("Timeout while cleaning #{dirname}") if cleanTree
+  rescue StandardError =>ex
+    error("Error while cleaning cache: #{ex}")
+    puts("Error while cleaning cache: #{ex}") if cleanTree
+  end
+
+  
+  private
+
+  def Cache.washDir(dirname, cleanTree)
+    # Clean up cache starting from dirname and in subdirectories if cleanTree is true
+    # Return [newSize in bytes, # deleted files, # scanned directories]
+    size = nDeleted = nDirectories = 0
+
+    Dir.chdir(dirname) { |d|
+      hIno = Hash.new(0) # hash of file inodes with more than one link
+      lst = Array.new    # array of file names, atime, ...
+      ttl = @ttl
+      ttl = 8e8 if ttl == 0 # No ttl, keep very old docs!
+
+      # Get list of files sorted on their dirname+atime
+      Find.find('.') { |f|
+        begin
+          unless f =~ /^\.$|#{@@lockfile}/  # ignore "." and lockfile 
+            ff = File.stat(f)
+            if ff.directory? then
+              Find.prune unless cleanTree
+            elsif ff.file? and f =~ /Meta|Data/ then
+              hIno[ff.ino] = ff.nlink if ff.nlink > 1
+              # List of files has [name, atime, size, # links, inode]
+              lst << [f, ff.atime, ff.size, ff.nlink, ff.ino]
+            end
+          end
+        rescue
+          nil # File.stat can fail because file could have been deleted, ignore error
+        end
+      }
+      
+      # Compute total size
+      size = lst.inject(0){ |tot, a| tot + if a[3] > 0 then a[2]/a[3] else 0 end }
+      
+      # Delete old *.Data.[md5] files that are not being referenced anymore/
+      lst.each { |a|
+        if a[3] == 1 && a[0] =~ /\.Data\.[0-9a-f]+(.gz)?$/ then
+          # Data file with no more links pointing to it
+          FileUtils.rm_rf(a[0])
+          nDeleted += 1
+          size -= a[2]
+          a[3] = 0 # Mark as deleted
+        end
+      }
+      
+      # Sort all files on atime
+      lst.sort!{ |a1, a2| a1[1] <=> a2[1] }
+      
+      t0 = Time.new
+      # Clean until size < maxSize _AND_ atime more recent than TTL
+      lst.each { |a|
+        break if size < @maxSize and t0-a[1] < ttl
+        next if a[3] < 1 # Already deleted in previous step
+        FileUtils.rm_rf(a[0])
+        nDeleted += 1
+        # Total size -= file size IF last link to data
+        if a[3] == 1 || hIno[a[4]] <= 1 then
+          size -= a[2]
+        end
+        hIno[a[4]] -= 1 if hIno[a[4]] > 0
+        a[3] = 0 # Mark as deleted by setting nlinks to 0
+      }
+      
+      # Remove deleted files from array
+      lst.reject! { |a| a[3] < 1 }
+      
+      
+      # Sort files per directory to enforce maxFiles
+      if cleanTree then
+        # Split the array in an array per directory
+        # and keep the files sorted on atime in each directory
+        slst = Hash.new
+        lst.length.times {
+          a = lst.shift
+          d = File.dirname(a[0])
+          if slst[d] then
+            slst[d] << a
+          else
+            slst[d] = [a]
+          end
+        }
+      else
+        # If not cleaning whole tree, we have only a single dir
+        slst = {"." => lst}
+      end
+      
+      nDirectories = slst.length
+
+      slst.each { |d, lst|
+        # Remove oldest files so that we have less than @maxFiles in it
+        if lst.length >= @maxFiles then
+          # Remove to leave up 90% of #maxFiles so we don't clean up only a handful of files repeatedly
+          (lst.length - 9*@maxFiles/10).times {
+            if a = lst.shift then
+              FileUtils.rm_rf(a[0])
+              nDeleted += 1
+              # Total size -= file size IF last link to data
+              if a[3] == 1 || hIno[a[4]] <= 1 then
+                size -= a[2]
+              end
+              hIno[a[4]] -= 1 if hIno[a[4]] > 0
+            end
+          }
+        end
+      }
+    } #end of chdir
+    [size, nDeleted, nDirectories]
+  end
+                    
+  
+  def Cache.makeNames(obj, params)
+    # Build meta filename and data filename from arguments
+    #
+    # obj is broken into a path and a filename with appended params
+    # e.g. /proj/en/index.xml?style=printable becomes /proj/en and index.xml+printable+yes
+    #  or  .#proj#en#index.xml+printable+yes
+    # depending on cacheTree param value
+
+    # .Meta and .Data are appended respectively to the meta filename and data filename
+    # Base is the filename without appending params, e.g. .#proj#en#index.xml.Data
+    if @cacheTree then
+      # Use a path and a file
+      dir = "#{@cacheDir}#{File.dirname(obj)}"
+      base = f = File.basename(obj)
+    else
+      # Convert full path into a single filename
+      dir = @cacheDir
+      base = f = ".#{obj.gsub(/\//,'#')}"
+    end
+
+    f = "#{f}+#{params.reject{|k,v| k.nil?}.sort.join('+')}" if params && params.to_a.length > 0    
+    # Remove funky chars and squeeze duplicates into single chars
+    f = f.gsub(/[^\w\#.+_-]/, "~").squeeze("~.#+")
+    
+    # Return names for Data and Meta files, and just the filepath (e.g. #proj#en#index.xml)
+    [dir, "#{dir}/#{base}.Data", "#{dir}/#{f}.Data#{@zip}", "#{dir}/#{f}.Meta"]
+  end
+end
+
+end
diff --git a/lib/gorg/cgi-bin/gorg.cgi b/lib/gorg/cgi-bin/gorg.cgi
new file mode 100755
index 0000000..3c75dbc
--- /dev/null
+++ b/lib/gorg/cgi-bin/gorg.cgi
@@ -0,0 +1,45 @@
+#! /usr/bin/ruby 
+
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with Foobar; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+require 'cgi'
+
+require 'gorg/cgi'
+
+if ARGV.length == 1  and  ['-F', '--filter'].include?(ARGV[0]) then
+  # cgi does not accept any params like gorg, 
+  # Only test on -F or --filter being there and nothing else
+  do_Filter unless STDIN.tty?
+else
+  # Make CGI's env public to get access to REQUEST_URI and DOCUMENT_ROOT
+  class CGI
+   public :env_table
+  end
+
+  include Gorg
+
+  # Config file is named in env var. GORG_CONF, or possibly REDIRECT_GORG_CONF
+  ENV["GORG_CONF"] = ENV["GORG_CONF"]||ENV["REDIRECT_GORG_CONF"]
+
+  gorgInit
+  STDERR.close
+
+  cgi = CGI.new
+  do_CGI(cgi)
+end
diff --git a/lib/gorg/cgi-bin/search.cgi b/lib/gorg/cgi-bin/search.cgi
new file mode 100755
index 0000000..396001e
--- /dev/null
+++ b/lib/gorg/cgi-bin/search.cgi
@@ -0,0 +1,50 @@
+#! /usr/bin/ruby
+
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with gorg; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+require 'cgi'
+require 'gorg/search'
+
+# Make CGI's env public to get access to REQUEST_URI and DOCUMENT_ROOT
+class CGI
+  public :env_table
+end
+
+include Gorg
+
+# Config file is named in env var. GORG_CONF, or possibly REDIRECT_GORG_CONF
+# ENV["PATH"] is used as a dirty hackish workaround a limitation of
+# webrick's cgi handler: environment variables can't be passed to cgi's
+# (REDIRECT_)GORG_CONF should be defined when running cgi's under apache
+ENV["GORG_CONF"] = ENV["GORG_CONF"]||ENV["REDIRECT_GORG_CONF"]||ENV["PATH"]
+
+gorgInit
+cgi = CGI.new
+
+# Params
+#
+# l = language code, no param will default to en, empty param defaults to any)
+# q = query string
+# p = page number in search result (0 < p < 1e6)
+# s = page size (9 < p < 120)
+# b = boolean search (y|Y|1 means yes, anything else no)
+
+gs = GDig::GSearch.new
+gs.do_CGI(cgi)
diff --git a/lib/gorg/cgi.rb b/lib/gorg/cgi.rb
new file mode 100644
index 0000000..dfe8451
--- /dev/null
+++ b/lib/gorg/cgi.rb
@@ -0,0 +1,198 @@
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with Foobar; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Process CGI request, either from cgi or fcgi
+
+require "gorg/base"
+
+module Gorg
+  def do_Filter(tmout=30, params=nil)
+    # Read STDIN, transform, spit result out
+    timeout(tmout) {
+      # Give it a few seconds to read it all, then timeout
+      xml = STDIN.read
+      err, body, filelist = xproc(xml, params, false, true)
+      if err["xmlErrLevel"] > 0 then
+        STDERR.puts("#{err.collect{|e|e.join(':')}.join("\n")}")
+      elsif (body||"").length < 1 then
+        # Some transforms can yield empty content
+        STDERR.puts("Empty body")
+      else
+        STDOUT.puts(body)
+      end
+    }
+  rescue Timeout::Error, StandardError =>ex
+    # Just spew it out
+    STDERR.puts(ex)
+  end
+  
+  def do_CGI(cgi)
+    header = Hash.new
+    if cgi.path_info.nil? || cgi.env_table["REQUEST_URI"].index("/#{File.basename($0)}/")
+      # Sorry, I'm not supposed to be called directly, e.g. /cgi-bin/gorg.cgi/bullshit_from_smartass_skriptbaby
+      raise Gorg::Status::Forbidden
+    elsif cgi.request_method == "OPTIONS"
+      cgi.out('Allow'=>'GET,HEAD'){""}
+    elsif cgi.request_method == "HEAD" or cgi.request_method == "GET"
+      # lighttp is b0rked despite what they say :(
+      # PATH_INFO == "" and PATH_TRANSLATED == nil
+      if cgi.path_info.length > 0 then
+        # Apache, or any web browser that works
+        path_info = cgi.path_info
+      else
+        # lighttp, use SCRIPT_NAME instead
+        path_info = cgi.env_table['SCRIPT_NAME']
+      end
+      query = Hash.new
+      cgi.params.each{ |p, v| query[p] = v.to_s}
+      # Get DOCUMENT_ROOT from environment
+      $Config["root"] = cgi.env_table['DOCUMENT_ROOT']
+
+      xml_file = cgi.path_translated||(cgi.env_table['DOCUMENT_ROOT']+cgi.env_table['SCRIPT_NAME'])
+      if not FileTest.file?(xml_file)
+        # Should have been checked by apache, check anyway
+        raise Gorg::Status::NotFound
+      else
+        # Process request
+        # Parse If-None-Match and If-Modified-Since request header fields if any
+        inm=ims=nil
+        begin
+          inm = split_header_etags(cgi.env_table['HTTP_IF_NONE_MATCH']) if cgi.env_table['HTTP_IF_NONE_MATCH']
+          ims = Time.parse(cgi.env_table['HTTP_IF_MODIFIED_SINCE']) if cgi.env_table['HTTP_IF_MODIFIED_SINCE']
+          ims = nil if ims > Time.now # Dates later than current must be ignored
+        rescue
+          # Just ignore ill-formated data
+          nil
+        end
+        if $Config['passthru'] && query["passthru"] && query["passthru"] != "0" then
+          # passthru allowed by config and requested by visitor, return file as text/plain
+          debug("Passthru granted for #{path_info}")
+          mstat = File.stat(xml_file)
+          raise Gorg::Status::NotModified.new(mstat) if notModified?(mstat, inm, ims)
+          body = IO.read(xml_file)
+          header['type'] = 'text/plain'
+          # If client accepts gzip encoding and we support it, return gzipped file
+          if $Config["zipLevel"] > 0 and ( cgi.accept_encoding =~ /gzip(\s*;\s*q=([0-9\.]+))?/ and ($2||"1") != "0" ) then
+            body = gzip(body, $Config["zipLevel"])
+            header['Content-Encoding'] = "gzip"
+            header['Vary'] = "Accept-Encoding"
+          end
+        else
+          # Get cookies and add them to the parameters
+          if $Config["acceptCookies"] then
+            # Add cookies to our params
+            query.merge!(cookies_to_params(cgi.cookies))
+          end
+
+          if $Config["httphost"] then
+            # Add HTTP_HOST to stylesheet params
+            query["httphost"] = if $Config["httphost"][0] == '*' then
+                                  cgi.host||""
+                                elsif $Config["httphost"].include?('*') then
+                                  $Config["httphost"][0]
+                                elsif $Config["httphost"].include?(cgi.host) then
+                                  $Config["httphost"][0]
+                                else
+                                  cgi.host||""
+                                end
+          end
+
+          xml_query = query.dup # xml_query==params passed to the XSL, query=>metadata in cache
+          if $Config["linkParam"] then
+            xml_query[$Config["linkParam"]] = path_info
+          end
+
+          bodyZ = nil # Compressed version
+          body, mstat, extrameta = Cache.hit(path_info, query, inm, ims)
+          if body.nil? then
+            # Cache miss, process file and cache result
+            err, body, filelist, extrameta = xproc(xml_file, xml_query, true)
+            if err["xmlErrLevel"] > 0 then
+              raise "#{err.collect{|e|e.join(':')}.join('<br/>')}"
+            elsif (body||"").length < 1 then
+              # Some transforms can yield empty content (handbook?part=9&chap=99)
+              # Consider this a 404
+              raise Gorg::Status::NotFound
+            else
+              # Cache the output if all was OK
+              mstat, bodyZ = Cache.store(body, path_info, query, filelist, extrameta)
+              debug("Cached #{path_info}, mstat=#{mstat.inspect}")
+              # Check inm & ims again as they might match if another web node had
+              # previously delivered the same data
+              if notModified?(mstat, inm, ims) and extrameta.join !~ /set-cookie/i
+                raise Gorg::Status::NotModified.new(mstat)
+              end
+            end
+          else
+            if $Config["zipLevel"] > 0 then
+              bodyZ = body
+              body = nil
+            end
+          end
+          # If client accepts gzip encoding and we support it, return gzipped file
+          if bodyZ and $Config["zipLevel"] > 0 and ( cgi.accept_encoding =~ /gzip(\s*;\s*q=([0-9\.]+))?/ and ($2||"1") != "0" ) then
+            body = bodyZ
+            header['Content-Encoding'] = "gzip"
+            header['Vary'] = "Accept-Encoding"
+          else
+            unless body then
+              # We need to unzip bodyZ into body, i.e. we cached zipped data but client does not support gzip
+              body = gunzip(bodyZ)
+            end
+          end
+          # Add cookies to http header
+          cookies = makeCookies(extrameta)
+          if cookies then
+            header['cookie'] = cookies
+          end
+          # Add Content-Type to header
+          ct = contentType(extrameta)
+          if ct then
+            # Turn application/xhtml+xml into text/html if browser does not accept it
+            if cgi.accept !~ /application\/xhtml\+xml/ and ct =~ /application\/xhtml\+xml(.*)$/ then
+              header['type'] = "text/html#{$1}"
+            else
+              header['type'] = ct
+            end
+          else
+            header['type'] = 'text/plain'
+          end
+        end
+        # Add ETag & Last-Modified http headers
+        # NB: it's simply mstat(file.xml) when passthru=1
+        if mstat then
+          header['ETag'] = makeETag(mstat)
+          header['Last-Modified'] = mstat.mtime.httpdate
+        end
+      end
+      cgi.out(header){body} 
+    else # Not a HEAD or GET
+      raise Gorg::Status::NotAllowed
+    end
+  rescue => ex
+    if ex.respond_to?(:errCode) then
+      # One of ours (Gorg::Status::HTTPStatus)
+      cgi.out(ex.header){ex.html}
+    else
+      # Some ruby exceptions occurred, make it a 500
+      syserr = Gorg::Status::SysError.new
+      cgi.out('Status'=>syserr.errSts){syserr.html(ex)}
+      error("do_CGI() failed: #{$!}")
+    end
+  end
+end
diff --git a/lib/gorg/fcgi-bin/gorg.fcgi b/lib/gorg/fcgi-bin/gorg.fcgi
new file mode 100755
index 0000000..1f81cf2
--- /dev/null
+++ b/lib/gorg/fcgi-bin/gorg.fcgi
@@ -0,0 +1,61 @@
+#! /usr/bin/ruby 
+
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with gorg; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+require 'cgi'
+require 'fcgi'
+
+# Overload read_from_cmdline to avoid crashing when request method
+# is neither GET/HEAD/POST. Default behaviour is to read input from
+# STDIN. Not really useful when your webserver gets OPTIONS / :-(
+class CGI
+  module QueryExtension
+    def read_from_cmdline
+      ''
+    end
+  end
+end
+
+
+require 'gorg/cgi'
+
+include Gorg
+
+gorgInit
+STDERR.close
+
+# Should I commit suicide after a while, life can be so boring!
+ak47 = $Config["autoKill"]||0
+
+countReq = 0; t0 = Time.new
+# Process CGI requests sent by the fastCGI engine
+FCGI.each_cgi do |cgi|
+  countReq += 1
+  do_CGI(cgi)
+  # Is it time to leave?
+  # If maximum number of requests has been exceeded _AND_ at least 1 full minute has gone by
+  if ak47 > 0 && countReq >= ak47 && Time.new - t0 > 60 then
+    info("Autokill : #{countReq} requests have been processed in #{Time.new-t0} seconds")
+    Process.kill("USR1",$$)
+  else
+    # Garbage Collect regularly to help keep memory
+    # footprint low enough without costing too much time.
+    GC.start if countReq%50==0
+  end
+end
diff --git a/lib/gorg/log.rb b/lib/gorg/log.rb
new file mode 100644
index 0000000..4ef05d6
--- /dev/null
+++ b/lib/gorg/log.rb
@@ -0,0 +1,56 @@
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with gorg; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Write logging info for our little gorg
+
+require 'syslog'
+require 'webrick/log'
+
+module Gorg
+  # Make log functions available as if we were inside a log instance
+  # If no $Log global variable has been initialized, do nothing
+  def fatal(msg) $Log.fatal(msg) if $Log; end
+  def error(msg) $Log.error(msg) if $Log; end
+  def warn(msg)  $Log.warn(msg)  if $Log; end
+  def info(msg)  $Log.info(msg)  if $Log; end
+  def debug(msg) $Log.debug(msg) if $Log; end
+
+ module Log
+ 
+  class MyLog < WEBrick::BasicLog
+    # Interface to WEBrick log system
+    # Not much to add at this time ;-)
+  end
+  
+  class MySyslog
+    # Interface to syslog
+    def initialize(appname)
+      # Open syslog if not already done (only one open is allowed)
+      @@syslog = Syslog.open(appname) unless defined?(@@syslog)
+      # Make sure messages get through (WEBrick has its own filter)
+      @@syslog.mask = Syslog::LOG_UPTO(Syslog::LOG_ERR)
+    end
+    
+    def <<(str)
+      # WEBrick's logging requires the << method
+      # Just forward string to syslog
+      @@syslog.err(str)
+    end
+  end
+ end
+end
diff --git a/lib/gorg/search.rb b/lib/gorg/search.rb
new file mode 100644
index 0000000..c90448a
--- /dev/null
+++ b/lib/gorg/search.rb
@@ -0,0 +1,444 @@
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with Foobar; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+require 'dbi'
+require 'yaml'
+require 'gorg/base'
+require 'cgi'
+
+module GDig
+  class GFile
+
+    def initialize(root, f, xlang)
+      @root = root
+      @fname = f
+      @xpath2lang = xlang
+    end
+
+    def txt
+      unless @txt then
+        @txt, @lang = txtifyFile
+      end
+      @txt
+    end
+    
+    def lang
+      unless @lang then
+        @txt, @lang = txtifyFile
+      end
+      @lang
+    end
+
+    private    
+    
+    def txtifyFile
+      x=Gorg::XSL.new
+      x.xsl = <<EOXSL
+<?xml version="1.0" encoding="UTF-8"?>
+        <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+        <xsl:output encoding="UTF-8" method="text" indent="no"/>
+        <xsl:template match="/">
+EOXSL
+      if (@xpath2lang||"").length > 1 then
+        x.xsl << <<EOXSL
+          <xsl:if test="#{@xpath2lang}">
+            <xsl:value-of select="concat('%%LANG%%', #{@xpath2lang}, '%%&#x0A;')"/>
+          </xsl:if>
+EOXSL
+      end
+      x.xsl << <<EOXSL
+        <xsl:apply-templates/>
+        </xsl:template>
+        <xsl:template match="*">
+          <xsl:apply-templates select="@*"/>
+          <xsl:apply-templates/>
+        </xsl:template>
+        <xsl:template match="@*">
+          <xsl:value-of select="concat(' ',.,' ')"/>
+        </xsl:template>
+        </xsl:stylesheet>
+EOXSL
+      x.xroot = @root
+      x.xml = @fname
+      x.process
+      
+      if x.xerr and x.xerr["xmlErrLevel"] >= 3 then
+        raise x.xerr["xmlErrMsg"]
+      end
+
+      t = x.xres
+      if t =~ /^%%LANG%%([^%]+)%%/ then
+        l = $1
+        t = $'.strip
+      else
+        l = nil
+      end
+      t << @fname
+      [t.squeeze("\n"), l]
+    end
+  end
+
+  class DBFile
+    attr_reader :fid, :webname
+    def initialize(dbh, webname, localname)
+      @dbh = dbh
+      @webname = webname
+      @localname = localname
+      @row = @dbh.select_one("SELECT id,path,lang,timestamp,size FROM files where path = ?", webname)
+      if @row then
+        @fid = @row['id']
+      else
+        @fid = nil
+      end
+    end
+    
+    def DBFile.remove(dbh, fid)
+      if fid then
+        dbh.do("delete from files where id=#{fid}")
+      end
+    end
+    
+    def uptodate?
+      if @fid then
+        unless @row then
+          @row = @dbh.select_one("SELECT id,path,lang,timestamp,size FROM files where id=#{@fid}")
+        end
+        if (fstat=File.stat(@localname)) and @row then
+          @row['timestamp']==fstat.mtime.to_s and @row['size']==fstat.size
+        else
+          false
+        end
+      end
+    end
+    
+    def update(blob, lang)
+      fstat=File.stat(@localname)
+      if @fid then
+        # update
+        sql = "update files set lang = ?, txt = ?, timestamp = ?, size = ? where id=#{@fid}"
+        @dbh.do(sql, lang, blob, fstat.mtime.to_s, fstat.size)
+      else
+        # insert new one
+        sql = "insert into files (path, lang, txt, timestamp, size) values (?, ?, ?, ?, ?)"
+        @dbh.do(sql, webname, lang, blob, fstat.mtime.to_s, fstat.size)
+        if id=@dbh.select_one("select last_insert_id()") then
+          @fid = id[0]
+        else
+          @fid = nil
+        end
+      end
+    end
+  end
+  
+  class GSearch
+    attr_reader :dbh, :searchTxt, :searchResult
+    include Gorg
+    
+    def initialize
+      @dbh = DBI.connect($Config['dbConnect'], $Config['dbUser'], $Config['dbPassword'])
+      @dbh['AutoCommit'] = true
+    end
+
+    def indexDir
+      wipe = false
+      scanDir { |webName, localName|
+        begin
+          dbf = GDig::DBFile.new(@dbh, webName, localName)
+          unless dbf.uptodate? then
+            gf = GFile.new($Config['root'], webName, $Config['xlang'])
+            blob = gf.txt
+            lang = gf.lang
+            if (lang||"").length < 1 then
+              # No lang attribute, see if we can use the filename
+              if $Config['flang'] and $Config['flang'].match(webName) then
+                lang = $Config['flang'].match(webName)[1]
+              end
+            end
+            dbf.update(blob, lang)
+            wipe = true
+            debug "#{Time.new.to_i}  #{webName} indexed"
+          end
+        rescue Exception => e
+          error "Failed to index #{webName} : #{e.to_s}"
+        end
+      }
+      wipeSearches if wipe
+    end
+    
+    def cleanup
+      # Remove files from db either because
+      # they should now be excluded or because they do not exist anymore
+      wipe = false
+      @dbh.select_all('select id, path from files') { |row|
+        if not fileMatch(row[1]) or not File.file?($Config['root']+row[1]) then
+          DBFile.remove(@dbh, row[0])
+          debug "GDig::GSearch:  #{row[1]} removed"
+          wipe = true
+        end
+      }
+      wipeSearches if wipe
+    end
+
+    def do_CGI(cgi)
+      $Config["root"] = cgi.env_table['DOCUMENT_ROOT']||$Config["root"]
+      query = {}
+      # Get cookies
+      if $Config["acceptCookies"] then
+        # Add cookies to our params
+        query = cookies_to_params(cgi.cookies)
+      end
+      # Add URI params that are not used by search engine (p,q,l,s)
+      cgi.params.each{ |p, v| query[p] = v.to_s}
+      
+      # Choose language
+      if cgi.has_key?("l") then
+        lang = cgi["l"]
+      elsif query.has_key?("SL") then
+        lang = query["SL"]
+      else
+        lang = nil
+      end
+
+      # Perform search
+      search(cgi["q"], lang)
+
+      if cgi.has_key?("p") and cgi["p"] =~ /^[0-9]{1,5}$/ then
+        p = cgi["p"].to_i
+      else
+        p = 1
+      end
+
+      if cgi.has_key?("s") and cgi["s"] =~ /^[0-9]{2,3}$/ then
+        s = cgi["s"].to_i
+      elsif query.has_key?("PL") and query["PL"] =~ /^[0-9]{2,3}$/ then
+        s = query["PL"].to_i
+      else
+        s = 20
+      end
+      s = 120 if s > 120
+      
+      xml = xmlResult(p,s)
+      header = {}; body = ""
+      if cgi.has_key?("passthru") and $Config["passthru"] then
+        header = {'type' => 'text/plain'}
+        body = xml
+      else
+        if $Config["linkParam"] then
+          query[$Config["linkParam"]] = cgi.script_name
+        end
+        if $Config["httphost"] then
+          # Add HTTP_HOST to stylesheet params
+          query["httphost"] = if $Config["httphost"][0] == '*' then
+                                cgi.host||""
+                              elsif $Config["httphost"].include?('*') then
+                                $Config["httphost"][0]
+                              elsif $Config["httphost"].include?(cgi.host) then
+                                $Config["httphost"][0]
+                              else
+                                cgi.host
+                              end
+        end
+
+        err, body, filelist, extra = xproc(xml, query, false)
+        if err["xmlErrLevel"] > 0 then
+          raise "#{err.collect{|e|e.join(':')}.join('<br/>')}"
+        end
+        cookies = makeCookies(extra)
+        ct = setContentType(body)
+        # Turn application/xhtml+xml into text/html if browser does not accept it
+        if cgi.accept !~ /application\/xhtml\+xml/ and ct =~ /application\/xhtml\+xml(.*)$/ then
+          header = {'type' => "text/html#{$1}"}
+        else
+          header = {'type' => ct}
+        end
+
+        # Add cookies to http header
+        if cookies then
+          header['cookie'] = cookies
+        end
+      end
+      # If client accepts gzip encoding and we support it, return gzipped file
+      if $Config["zipLevel"] > 0 and ( cgi.accept_encoding =~ /gzip(\s*;\s*q=([0-9\.]+))?/ and ($2||"1") != "0" ) then
+        body = gzip(body, $Config["zipLevel"])
+        header['Content-Encoding'] = "gzip"
+        header['Vary'] = "Accept-Encoding"
+      end
+      cgi.out(header){body}
+    rescue => ex
+      syserr = Gorg::Status::SysError.new
+      cgi.out('Status'=>syserr.errSts){syserr.html(ex)}
+      error("GSearch::do_CGI() failed: #{$!}")      
+    end
+    
+    def search(str, lang)
+      @searchTxt = str
+      @searchResult = nil
+      if (lang||"") == "" then
+        @searchLang = '%'
+      else
+        @searchLang = lang
+      end
+      if str =~ /(^|\s)(([+<)(>~-][^+<)(>~-]+)|([^+<)(>~-]+\*))(\s|$)/ then
+        @searchBool = "Y"
+        boolClause = "in boolean mode"
+      else
+        @searchBool = "N"
+        boolClause = ""
+      end
+      if @searchTxt.length > 0 then
+        @searchResult = loadSearch
+        unless @searchResult then
+          @searchResult = []
+          # Perform full text search
+          sql = <<EOSQL
+select id, path, lang, match (txt) against ( ? ) as score
+from files
+where lang like ? and match (txt) against ( ? #{boolClause} )
+order by score desc
+EOSQL
+          @dbh.select_all(sql, @searchTxt, @searchLang, @searchTxt).each { |r| @searchResult << [r[0],r[1],r[2],r[3]] }
+          saveSearch
+        end
+      end
+      @searchResult
+    end
+    
+    def xmlResult(page=1, pageLength=25)
+      # <search page="p" pages="n">
+      #   <for>search string</for>
+      #   <found link="/path/to/file.xml" lang="fr">
+      #     blah blah <b>word2</b> bleh
+      #   </found>
+      pageLength = 20 if pageLength < 1
+      xml = "<?xml version='1.0' encoding='UTF-8'?>\n\n"
+      
+      if @searchResult and @searchResult.length >= 1 then
+        removeDeadFiles
+        nPages = @searchResult.length / pageLength #/
+        nPages += 1 unless 0 == @searchResult.length.modulo(pageLength)
+        page = nPages if page > nPages
+        page = 1 if page < 1
+
+        xml << "<search page='#{page}' pages='#{nPages}' pageLength='#{pageLength}' lang='#{xmlEscape(@searchLang)}' bool='#{@searchBool}'>\n"
+        xml << xmlSearchFor
+        @searchResult[(page-1)*pageLength..page*pageLength-1].each { |r|
+          xml << "  <found link='#{r[1]}' lang='#{r[2]}' score='#{r[3]}'>\n"
+          xml << xmlBlobSample(r[0]) << "\n"
+          xml << "  </found>\n"
+        }
+      else
+        xml << "<search page='0' pages='0'>\n"
+        xml << xmlSearchFor
+      end
+      xml << "</search>\n"
+    end
+    
+    def scanDir
+      Dir.chdir($Config['root']) {
+        `find -L . -type f`.split("\n").each{ |localFile|
+          if File.file?(localFile) then
+            webFile = localFile[1..-1]
+            if fileMatch(webFile) then
+              yield [webFile, File.expand_path(localFile)]
+            end
+          end
+        }
+      }
+    end
+    
+    private
+    
+    def xmlBlobSample(fileID)
+      blob = ""
+      r = @dbh.select_one("select txt from files where id = #{fileID}")
+      if r then
+        blob = r[0]
+        # Find first matching word and extract some text around it
+        stxt = @searchTxt.tr('`.,\'"\-_+~<>/?;:[]{}+|\\)(*&^%\$\#@!', ' ').split(' ')
+        regs = stxt.collect { |w| Regexp.new(w, true, 'U') }
+        ix = nil
+        regs.each { |r| break if ix=blob.index(r) }
+        if ix then
+          if ix < 80 then
+            x = 0
+          else
+            x = blob[0,ix-60].rindex(/[ ,\.]/)
+            x = 0 unless x
+          end
+          y = blob.index(/[,\. ]/, ix+80)
+          y = -1 unless y
+          blob = xmlEscape(blob[x..y])
+          # Mark up sought words
+          regs.each { |r| blob.gsub!(r){|t| "<b>#{t}</b>"} }
+        else
+          x = blob[120..-1].index(/[ ,\.]/)
+          blob = xmlEscape(blob[0..x])
+        end
+      end
+      blob
+    end
+    
+    def xmlEscape(str)
+      if str
+        str.gsub('&','&amp;').gsub('>','&gt;').gsub('<','&lt;')
+      else
+        "w00t"
+      end
+    end
+    
+    def loadSearch
+      if @searchTxt then
+        r = @dbh.select_one("select result from savedsearches where words = ? and lang = ? and bool = ?", @searchTxt, @searchLang, @searchBool)
+        if r then
+          YAML::load(r[0])
+        end
+      end
+    end
+    
+    def saveSearch
+      if @searchTxt then
+        @dbh.do("delete from savedsearches where words = ? and lang = ? and bool = ?", @searchTxt, @searchLang, @searchBool)
+        @dbh.do("insert into savedsearches (words, lang, bool, result) values(?, ?, ?, ?)", @searchTxt, @searchLang, @searchBool, @searchResult.to_yaml)
+      end
+    end
+    
+    def wipeSearches
+      @dbh.do("delete from savedsearches")
+    end
+    
+    def fileMatch(f)
+      $Config['in/out'].each { |inout|
+        return inout[0] if inout[1].match(f)
+      }
+      false
+    end
+    
+    def removeDeadFiles
+      if @searchResult then
+        @searchResult.reject!{ |r| not File.file?($Config['root']+r[1]) }
+      end
+    end
+    
+    def xmlSearchFor
+      "  <for>#{xmlEscape(@searchTxt)}</for>\n" if @searchTxt
+    end
+    
+  end
+  
+end
diff --git a/lib/gorg/www.rb b/lib/gorg/www.rb
new file mode 100644
index 0000000..eb0c8fa
--- /dev/null
+++ b/lib/gorg/www.rb
@@ -0,0 +1,207 @@
+###   Copyright 2004,   Xavier Neys   (neysx@gentoo.org)
+# #
+# #   This file is part of gorg.
+# #
+# #   gorg is free software; you can redistribute it and/or modify
+# #   it under the terms of the GNU General Public License as published by
+# #   the Free Software Foundation; either version 2 of the License, or
+# #   (at your option) any later version.
+# #
+# #   gorg is distributed in the hope that it will be useful,
+# #   but WITHOUT ANY WARRANTY; without even the implied warranty of
+# #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# #   GNU General Public License for more details.
+# #
+# #   You should have received a copy of the GNU General Public License
+# #   along with gorg; if not, write to the Free Software
+###   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Run the stand-alone webserver and serve gentoo.org
+
+require 'gorg/base'
+require 'webrick'
+require 'cgi'
+
+class GentooServlet < WEBrick::HTTPServlet::FileHandler
+  include Gorg
+  
+  def do_GET(req, res)
+    hit = "#{$Config["root"]}#{req.path}"
+    cacheName = req.path
+    if FileTest.directory?(hit) and FileTest.exist?(hit+"/index.xml") then
+      # Use $URI/index.xml for directories that have an index.xml file
+      hit << "/index.xml"
+      cacheName << "/index.xml"
+    end
+    hit.squeeze!('/')
+    cacheName.squeeze!('/')
+    if FileTest.directory?(hit) then
+      super # Use default FileHandler for directories that have no index.xml
+    else
+      if hit !~ /\.(xml)|(rdf)|(rss)$/ then
+        super # Use default FileHandler if not an xml file
+      else
+        if not FileTest.exist?(hit) then
+          super # Use default FileHandler to handle 404 (file does not exist)
+        else
+          # Parse If-None-Match and If-Modified-Since request header fields if any
+          ims=inm=nil
+          begin
+            ims = Time.parse(req['if-modified-since']) if req['if-modified-since']
+            inm = split_header_etags(req['if-none-match']) if req['if-none-match']
+          rescue
+            # Just ignore ill-formated data
+            nil
+          end
+          begin
+            res['Charset'] = 'UTF-8'
+            # Process xml file or return xml file if passthru=1
+            if $Config['passthru'] && req.query && req.query["passthru"] && req.query["passthru"] != "0" then
+              # passthru allowed by config and requested by visitor, return file as text/plain
+              mstat = File.stat(hit)
+              raise Gorg::Status::NotModified.new(mstat) if notModified?(mstat, inm, ims)
+              debug("Passthru granted for #{hit}")
+              body = IO.read(hit)
+              # If client accepts gzip encoding and we support it, return gzipped file
+              if $Config["zipLevel"] > 0 and (req.accept_encoding.include?("gzip") or req.accept_encoding.include?("x-gzip")) then
+                res.body = gzip(body, $Config["zipLevel"])
+                res['Content-Encoding'] = "gzip"
+                res['Vary'] = "Accept-Encoding"
+              else
+                res.body = body
+              end
+              res['Content-Type'] = 'text/plain'
+            else
+              query_params = req.query.dup
+              # Get cookies and add them to the parameters
+              if $Config["acceptCookies"] then
+                # We need CGI:Cookie objects to be compatible with our cgi modules (stupid WEBrick)
+                ck = req.raw_header.find{|l| l =~ /^cookie: /i}
+                if ck then
+                  query_params.merge!(cookies_to_params(CGI::Cookie.parse($'.strip)))
+                  debug "query params are " + query_params.inspect
+                end
+              end
+              if $Config["httphost"] then
+                # Add HTTP_HOST to stylesheet params
+                query_params["httphost"] = if $Config["httphost"][0] == '*' then
+                                             req.host||""
+                                           elsif $Config["httphost"].include?('*') then
+                                             $Config["httphost"][0]
+                                           elsif $Config["httphost"].include?(req.host) then
+                                             $Config["httphost"][0]
+                                           else
+                                             req.host||""
+                                           end
+              end
+
+              bodyZ = nil
+              body, mstat, extrameta = Gorg::Cache.hit(cacheName, query_params, inm, ims)
+              if body.nil? then
+                xml_query = query_params.dup
+                if $Config["linkParam"] then
+                  xml_query[$Config["linkParam"]] = req.path
+                end
+                # Cache miss, process file and cache result
+                err, body, filelist, extrameta = xproc(hit, xml_query, true)
+                warn("#{err.collect{|e|e.join(':')}.join('; ')}") if err["xmlErrLevel"] == 1
+                error("#{err.collect{|e|e.join(':')}.join('; ')}") if err["xmlErrLevel"] > 1
+                # Display error message if any, just like the cgi/fcgi versions
+                raise ("#{err.collect{|e|e.join(':')}.join('<br/>')}") if err["xmlErrLevel"] > 0
+                # Cache output
+                mstat, bodyZ = Gorg::Cache.store(body, cacheName, query_params, filelist, extrameta)
+              else
+                if $Config["zipLevel"] > 0 then
+                  bodyZ = body
+                  body = nil
+                end
+              end
+              # If client accepts gzip encoding and we support it, return gzipped file
+              if bodyZ and $Config["zipLevel"] > 0 and (req.accept_encoding.include?("gzip") or req.accept_encoding.include?("x-gzip")) then
+                res.body = bodyZ
+                res['Content-Encoding'] = "gzip"
+                res['Vary'] = "Accept-Encoding"
+              else
+                if body then
+                  res.body = body
+                else
+                  # We need to unzip bodyZ into body, i.e. we cached zipped data but client does not support gzip
+                  res.body = gunzip(bodyZ)
+                end
+              end
+              # Add cookies to http header
+              cookies = makeCookies(extrameta)
+              if cookies then
+                cookies.each{|c| res.cookies << c.to_s}
+              end
+              # Add Content-Type to header
+              ct = contentType(extrameta).split(';')[0]
+              if ct then
+                # Turn application/xhtml+xml into text/html if browser does not accept it
+                if req.accept.to_s !~ /application\/xhtml\+xml/ and ct =~ /application\/xhtml\+xml(.*)$/ then
+                  res['Content-Type'] = "text/html#{$1}"
+                else
+                  res['Content-Type'] = ct
+                end
+              else
+                res['Content-Type'] = 'text/plain'
+              end
+            end
+            if mstat then
+              res['ETag'] = makeETag(mstat)
+              res['Last-Modified'] = mstat.mtime.httpdate
+            end
+          rescue => ex
+            if ex.respond_to?(:errCode) then
+              # One of ours (Gorg::Status::HTTPStatus)
+              res.body = ex.html
+              res.status = ex.errCode
+              ex.header.each {|k,v| res[k]=v unless k =~ /status|cookie/i}
+            else
+              # Some ruby exceptions occurred, make it a syserr
+              syserr = Gorg::Status::SysError.new
+              res.body = syserr.html(ex)
+              res.status = syserr.errCode
+            end
+          end
+        end
+      end
+    end
+  end
+end
+
+###
+#|#  Start Here 
+###
+
+def www
+  # Log accesses to either stderr, syslog or a file
+  if $Config["accessLog"] == "syslog"
+    # Use syslog again, use our own format based on default but without timestamp
+    access_log = [ [ @syslog, "HIT   %h \"%r\" %s %b" ] ]
+    STDERR.close
+  elsif $Config["accessLog"] == "stderr"
+    # Use syslog again, use our own format based on default but without timestamp
+    access_log = [ [ STDERR, "HIT   %h \"%r\" %s %b" ] ]
+  else
+    # Open file and use it, if it's not writable, tough!
+    access_log_stream = File.open($Config["accessLog"], "a")
+    access_log = [ [ access_log_stream, WEBrick::AccessLog::COMBINED_LOG_FORMAT ] ]
+    STDERR.close
+  end
+
+  s = WEBrick::HTTPServer.new( :BindAddress => $Config["listen"], :AccessLog=>access_log, :Logger => $Log, :Port => $Config["port"], :CGIPathEnv => ENV["GORG_CONF"])
+
+  # Mount directories
+  $Config["mounts"].each { |m|
+    s.mount(m[0], WEBrick::HTTPServlet::FileHandler, m[1])
+  }
+  s.mount("/", GentooServlet, $Config["root"])
+
+  # Start server
+  trap("INT"){ s.shutdown }
+
+  puts "\n\nStarting the Gorg web server on #{$Config['listen']}:#{$Config['port']}\n\nHit Ctrl-C or type \"kill #{$$}\" to stop it\n\n"
+
+  s.start
+end