# Samizdat RDF storage
#
#   Copyright (c) 2002-2008  Dmitry Borodaenko <angdraug@debian.org>
#
#   This program is free software.
#   You can distribute/modify this program under the terms of
#   the GNU General Public License version 2 or later.
#
# see doc/rdf-storage.txt for introduction and Samizdat Squish definition;
# see doc/storage-impl.txt for explanation of implemented algorithms
#
# vim: et sw=2 sts=2 ts=8 tw=0

require 'samizdat/cache'

module Samizdat

# raised for syntax errors in Squish statements
class ProgrammingError < RuntimeError; end

# provides access to RDF storage via DBI-like interface
#
class RDF

  # initialize class attributes
  #
  def initialize(db, map)
    @db = db

    @ns = map['ns']

    @map = {}
    map['map'].each_pair {|p, m|
      @map[ p.sub(/\A(\S+?)::/) { @ns[$1] } ] = m
    }

    @select_cache = Cache.new(nil, 1000)
  end

  # cached internal property map with expanded namespaces
  attr_reader :map

  # namespaces hash
  attr_reader :ns

  # reference to the DBI database
  attr_reader :db

  # get value of subject's property
  #
  def get_property(subject, property)
    object, = select_one %{
SELECT ?object WHERE (#{property} #{subject} ?object)}
    object
  end

  # get one query answer (similar to DBI#select_one)
  #
  def select_one(query, params={})
    @db.select_one(*select(query, params))
  end

  # get all query answers (similar to DBI#select_all)
  #
  def select_all(query, limit=nil, offset=nil, params={}, &p)
    sql, *values = select(query, params)
    sql = sql.dup
    sql << "\nLIMIT #{limit}" if limit
    sql << "\nOFFSET #{offset}" if offset
    if block_given?
      @db.select_all sql, *values, &p
    else
      @db.select_all sql, *values
    end
  end

  # accepts String or pre-parsed SquishQuery object, caches SQL by String
  #
  def select(query, params={})
    query.kind_of?(String) and
      query = @select_cache.fetch_or_add(query) { SquishQuery.new(query, @ns) }
    query.kind_of?(SquishQuery) or raise ProgrammingError,
      "String or SquishQuery expected"
    query.to_sql(params)
  end

  # merge Squish query into RDF database
  #
  # returns list of new ids assigned to blank nodes listed in INSERT section
  #
  # always run it inside transaction, with AutoCommit disabled
  #
  def assert(query, params={})
    q = SquishQuery.new(query, @ns)
    raise ProgrammingError, 'Wrong query type: assert expected' unless
      q.type == :assert
    insert, update = q.nodes

    sm = q.sql_mapper

    # Stage 1: Resources
    v = {}   # v: node -> value
    new = {} # new[node] if node was inserted at this stage
    sm.nodes.each do |node, n|

      if node =~ SquishQuery::INTERNAL   # internal resource
        v[node] = $1   # resource id

      elsif node =~ SquishQuery::PARAMETER or node =~ SquishQuery::LITERAL
        v[node] = node   # pass parametrized value or string literal as is

      elsif node =~ SquishQuery::BN
        subject_position = n[:positions].collect {|p|
          :subject == p[:role] ? p : nil
        }.compact.first

        if subject_position.nil?   # blank node occuring only in object position
          v[node] = update[node]   # todo: what if update[node].nil?

        else   # resource blank node
          unless insert.include?(node)
            s = SquishQuery.new(
              :type => :select,
              :nodes => [node],
              :pattern => q.subgraph(node),
              :strings => q.strings
            )
            v[node], = select_one(s, params)
          end

          if v[node].nil?
            db.do "INSERT INTO Resource (label) VALUES (?)",
              sm.clauses[ subject_position[:clause] ][:subject][:table]
            v[node], = db.select_one "SELECT MAX(id) FROM Resource"
            new[node] = true
          end
        end

      else   # external resource
        v[node], = db.select_one "SELECT id FROM Resource
          WHERE literal = 'false' AND uriref = 'true' AND label = ?", node

        if v[node].nil?
          db.do "INSERT INTO Resource (uriref, label) VALUES ('true', ?)", node
          v[node], = db.select_one "SELECT MAX(id) FROM Resource"
          new[node] = true
        end
      end
    end

    # Stage 2: Properties
    a = {}   # a: alias -> positions*
    sm.clauses.each_with_index do |clause, i|
      a[ clause[:alias] ] ||= []
      a[ clause[:alias] ].push(clause)
    end

    a.each do |alias_, clauses|
      subject = clauses.first[:subject]
      key_node = subject[:node]
      table = subject[:table]

      data = []
      clauses.each do |clause|
        node = clause[:object][:node]
        if new[key_node] or update[node]
          data.push([
            clause[:object][:field],
            q.substitute_literals(v[node])   # value
          ])
          # todo: prove that value is not nil
        end
      end

      if new[key_node]
        data.unshift [ 'id', v[key_node] ]
        # when id is inserted, insert_resource() trigger does nothing
        sql = "INSERT INTO #{table} ("+
          data.collect {|field, value| field }.join(', ')+') VALUES ('+
          data.collect {|field, value| value }.join(', ')+')'

      elsif data.length > 0
        sql = "UPDATE #{table} SET "+
          data.collect {|field, value| field+' = '+value }.join(', ')+
          " WHERE id = #{v[key_node]}"
      end

      if sql
        sql, values = SquishQuery.substitute_parameters(sql, params)
        db.do(sql, *values)
      end
    end

    return insert.collect {|node| v[node] }
  end
end


require 'uri/common'

# parse Squish query and translate triples to relational conditions
#
# provides access to internal representation of the parsed query and utility
# functions to deal with Squish syntax
#
class SquishQuery
  # regexp for internal resource reference
  INTERNAL = Regexp.new(/\A([[:digit:]]+)\z/).freeze

  # regexp for blank node mark and name
  BN = Regexp.new(/\A\?([[:alnum:]_]+)\z/).freeze

  # regexp for scanning blank nodes inside a string
  BN_SCAN = Regexp.new(/\?[[:alnum:]_]+?\b/).freeze

  # regexp for parametrized value
  PARAMETER = Regexp.new(/\A:([[:alnum:]_]+)\z/).freeze

  # regexp for replaced string literal
  LITERAL = Regexp.new(/\A'\d+'\z/).freeze

  # regexp for number
  NUMBER = Regexp.new(/\A-?[[:digit:]]+(\.[[:digit:]]+)?\z/).freeze

  # regexp for operator
  OPERATOR = Regexp.new(/\A(\+|-|\*|\/|<|<=|>|>=|=|I?LIKE|NOT|AND|OR|IS|NULL)\z/i).freeze

  # regexp for aggregate function
  AGGREGATE = Regexp.new(/\A(avg|count|max|min|sum)\z/i).freeze

  QUERY = Regexp.new(/\A\s*(SELECT|INSERT|UPDATE)\b\s*(.*?)\s*
        \bWHERE\b\s*(.*?)\s*
        (?:\bOPTIONAL\b\s*(.*?))?\s*
        (?:\bLITERAL\b\s*(.*?))?\s*
        (?:\bGROUP\s+BY\b\s*(.*?))?\s*
        (?:\bORDER\s+BY\b\s*(.*?)\s*(ASC|DESC)?)?\s*
        (?:\bUSING\b\s*(.*?))?\s*\z/mix).freeze

  # extract common Squish query sections, perform namespace substitution,
  # generate query pattern graph, call transform_pattern,
  # determine query type and parse nodes section accordingly
  #
  def initialize(query, default_ns = {})
    query.nil? and raise ProgrammingError, "SquishQuery: query can't be nil"
    if query.kind_of? Hash  # pre-parsed query for RDF#assert
      @type = query[:type]
      @nodes = query[:nodes]
      @pattern = query[:pattern]
      @optional = query[:optional]
      @strings = query[:strings]
      @literal = @group = @order = ''
      @sql_mapper = SqlMapper.new(@pattern)
      return self
    elsif not query.kind_of? String
      raise ProgrammingError,
        "Bad query initialization parameter class: #{query.class}"
    end

    @query = query   # keep original string
    query = query.dup

    # replace string literals with 'n' placeholders (also see #substitute_literals)
    @strings = []
    query.gsub!(/'(''|[^'])*'/m) do
      @strings.push $&
      "'" + (@strings.size - 1).to_s + "'"
    end

    match = QUERY.match(query) or raise ProgrammingError,
      "Malformed query: are keywords SELECT, INSERT, UPDATE or WHERE missing?"
    match, @key, @nodes, @pattern, @optional, @literal,
      @group, @order, @order_dir, @ns = match.to_a.collect {|m| m.to_s }
    match = nil
    @key.upcase!
    @order_dir.upcase!

    # namespaces
    # todo: validate ns
    @ns = (@ns.empty? or /\APRESET\s+NS\z/ =~ @ns) ? default_ns :
      Hash[*@ns.gsub(/\b(FOR|AS|AND)\b/i, '').scan(/\S+/)]
    @pattern = parse_pattern(@pattern)
    @optional = parse_pattern(@optional)

    # validate SQL expressions
    validate_expression(@literal)
    @group.split(/\s*,\s*/).each {|group| validate_expression(group) }
    validate_expression(@order)

    @sql_mapper = SqlMapper.new(
      @pattern, @optional, @literal)

    # check that all variables can be bound
    @variables = query.scan(BN_SCAN)
    @variables.each {|node| @sql_mapper.bind(node) }

    # determine query type, parse and validate nodes section
    if 'SELECT' == @key
      @type = :select
      @nodes = @nodes.split(/\s*,\s*/).collect {|node|
        validate_expression(node)
        node
      }
    else
      @type = :assert
      if 'UPDATE' == @key
        insert = ''
        update = @nodes
      elsif 'INSERT' == @key and @nodes =~ /\A\s*(.*?)\s*(?:\bUPDATE\b\s*(.*?))?\s*\z/
        insert, update = $1, $2.to_s
      else
        raise ProgrammingError,
          "Query doesn't start with one of SELECT, INSERT, or UPDATE"
      end
      insert = insert.split(/\s*,\s*/).each {|s|
        raise ProgrammingError, "Blank node expected in INSERT section instead of '#{s}'" unless s =~ BN
      }
      update = Hash[*update.split(/\s*,\s*/).collect {|s|
        s.split(/\s*=\s*/)
      }.each {|node, value|
        raise ProgrammingError, "Blank node expected on the left side of UPDATE assignment instead of '#{bn}'" unless node =~ BN
        validate_expression(value)
      }.flatten!] unless update.empty?
      @nodes = [insert, (update or {})]
    end
    # todo: don't bind list

    return self
  end

  # replaced literals
  attr_reader :strings

  # starting keyword, SELECT, INSERT or UPDATE
  attr_reader :key

  # type of query, :select or :assert
  attr_reader :type

  # blank variables control section
  attr_reader :nodes

  # query pattern graph as array of triples [ [p, s, o], ... ]
  attr_reader :pattern

  # literal SQL expression
  attr_reader :literal

  # SQL GROUP BY expression
  attr_reader :group

  # SQL order expression
  attr_reader :order

  # direction of order, ASC or DESC
  attr_reader :order_dir

  # query namespaces mapping
  attr_reader :ns

  # list of variables defined in the query
  attr_reader :variables

  # SqlMapper object holding transformed query pattern
  attr_reader :sql_mapper

  # returns original string passed in for parsing
  #
  def to_s
    @query
  end

  # replace 'n' substitutions with query string literals (see #new, #LITERAL)
  #
  def substitute_literals(s)
    return s unless s.kind_of? String
    s.gsub(/'(\d+)'/) do
      @strings[$1.to_i] or $&
    end
  end

  # replace RDF query parameters in SQL query with '?' marks,
  # return resultant query and array of parameter values
  #
  def SquishQuery.substitute_parameters(sql, params={})
    values = []
    sql.gsub!(/\B:([[:alnum:]_]+)/) do   # see #PARAMETER
      name = $1.to_sym
      params.has_key?(name) or raise ProgrammingError,
        "Missing value for :#{name} in parametrized query"
      values.push(params[name])
      '?'
    end
    [sql, values]
  end

  # replace schema uri with namespace prefix
  #
  def SquishQuery.uri_shrink!(uriref, prefix, uri)
    uriref.gsub!(/\A#{uri}([^\/#]+)\z/) {"#{prefix}::#{$1}"}
  end

  # replace schema uri with a prefix from a supplied namespaces hash
  #
  def SquishQuery.ns_shrink(uriref, namespaces)
    u = uriref.dup
    namespaces.each {|p, uri| SquishQuery.uri_shrink!(u, p, uri) and break }
    return u
  end

  # replace schema uri with a prefix from query namespaces
  #
  def ns_shrink(uriref)
    SquishQuery.ns_shrink(uriref, @ns)
  end

  # validate expression
  #
  # expression := value [ operator expression ]
  #
  # value := blank_node | literal_string | number | '(' expression ')'
  #
  # whitespace between tokens (except inside parentheses) is mandatory
  #
  def validate_expression(string)
    # todo: lexical analyser
    string.split(/[\s()]+/).collect do |token|
      case token
      when BN, PARAMETER, LITERAL, NUMBER, OPERATOR, AGGREGATE
      else
        raise ProgrammingError, "Bad token '#{token}' in expression"
      end
    end
  end

  # translate Squish SELECT query to SQL,
  # return SQL query and a list of parameter values in proper order
  #
  def to_sql(params={})
    raise ProgrammingError, "Wrong query type: select expected" unless
      @type == :select

    where = @sql_mapper.where

    # now put it all together
    sql = %{SELECT #{@nodes.join(', ')}\nFROM #{@sql_mapper.from}}
    sql << %{\nWHERE #{where}} unless where.empty?
    sql << %{\nGROUP BY #{@group}} unless @group.empty?
    sql << %{\nORDER BY #{@order} #{@order_dir}} unless @order.empty?

    # replace blank node names with bindings
    sql.gsub!(BN_SCAN) {|node| @sql_mapper.bind(node) }
    sql =~ /\?/ and raise ProgrammingError,
      "Unexpected '?' in translated query (probably, caused by unmapped blank node): #{sql.gsub(/\s+/, ' ')};"

    sql, values = SquishQuery.substitute_parameters(sql, params)
    [substitute_literals(sql), *values]
  end

  # calculate subgraph of query pattern that is reachable from _node_
  #
  # fixme: make it work with optional sub-patterns
  #
  def subgraph(node)
    subgraph = [node]
    w = []
    begin
      stop = true
      @pattern.each do |triple|
        if subgraph.include? triple[1] and not w.include? triple
          subgraph.push triple[2]
          w.push triple
          stop = false
        end
      end
    end until stop
    return w
  end

  private

  PATTERN_SCAN = Regexp.new(/\((\S+)\s+(\S+)\s+(.*?)(?:\s+\bFILTER\b\s*(.*?)\s*)?\)/).freeze

  # parse query pattern graph out of a string, expand URI namespaces
  #
  def parse_pattern(pattern)
    pattern.scan(/\(.*?\)/).collect do |c|
      match, predicate, subject, object, filter = c.match(PATTERN_SCAN).to_a
      match = nil

      [predicate, subject, object].each do |u|
        u.sub!(/\A(\S+?)::/) do
          @ns[$1] or raise ProgrammingError, "Undefined namespace prefix #{$1}"
        end
      end

      validate_expression(filter.to_s)

      [predicate, subject, object, filter]
    end
  end
end


class SqlNodeBinding
  def initialize(table_alias, field)
    @alias = table_alias
    @field = field
  end

  attr_reader :alias, :field

  def to_s
    @alias + '.' + @field
  end

  def eql?(binding)
    @alias == binding.alias and @field == binding.field
  end

  alias :'==' :eql?

  def hash
    self.to_s.hash
  end
end


class SqlExpression < DelegateClass(Array)
  def initialize(*parts)
    super parts
  end

  def to_s
    '(' << self.join(' ') << ')'
  end

  def traverse(&block)
    self.each do |part|
      case part
      when SqlExpression
        part.traverse(&block)
      else
        yield
      end
    end
  end

  def rebind!(rebind, &block)
    self.each_with_index do |part, i|
      case part
      when SqlExpression
        part.rebind!(rebind, &block)
      when SqlNodeBinding
        if rebind[part]
          self[i] = rebind[part] 
          yield part if block_given?
        end
      end
    end
  end
end


# transform Squish query pattern graph to SQL join
#
class SqlMapper
  def initialize(pattern, optional = [], global_filter = '')
    (check_graph(pattern) and check_graph(pattern + optional)) or raise ProgrammingError,
      "Query pattern is a disjoint graph"

    map_predicates(pattern, optional)
    transform(global_filter.scan(SquishQuery::BN_SCAN))
    generate_tables_and_conditions(global_filter)

    @jc = @aliases = @ac = nil
  end

  # map clause position to table, field, and table alias
  #
  #  position => {
  #    :subject => {
  #      :field => field,
  #      :table => table
  #    },
  #    :object => {
  #      :field => field,
  #      :table => table
  #    },
  #    :optional => < true | false >,
  #    :alias => alias
  #  }
  #
  attr_reader :clauses

  # map node to list of positions in clauses, and bindings
  #
  #  node => {
  #    :positions => [
  #      { :clause => position, :role => < :subject | :object > }
  #    ]
  #    :bindings => [ binding1, ... ]
  #  }
  #
  attr_reader :nodes

  # list of tables for FROM clause of SQL query
  attr_reader :from

  # conditions for WHERE clause of SQL query
  attr_reader :where

  # return node's binding, raise exception if the node isn't bound
  #
  def bind(node)
    (@nodes[node] and @nodes[node][:bindings] and @nodes[node][:bindings].first
    ) or raise ProgrammingError,
      "Blank node '#{node}' is not bound by the query pattern"
  end

  private

  # check whether pattern is not a disjoint graph (all nodes are
  # undirectionally reachable from one node)
  #
  def check_graph(pattern)
    nodes = pattern.transpose[1].uniq   # all subject nodes

    seen = [ nodes.shift ]
    found_more = true

    while found_more and not nodes.empty?
      found_more = false

      pattern.each do |predicate, subject, object|

        if seen.include?(subject) and nodes.include?(object)
          seen.push(object)
          nodes.delete(object)
          found_more = true

        elsif seen.include?(object) and nodes.include?(subject)
          seen.push(subject)
          nodes.delete(subject)
          found_more = true
        end
      end
    end

    return nodes.empty?
  end

  # Stage 1: Predicate Mapping (storage-impl.txt)
  #
  def map_predicates(pattern, optional)
    @nodes = {}
    @clauses = []

    map_pattern(pattern)
    map_pattern(optional, true)
    refine_ambiguous_properties
  end

  def map_pattern(pattern, optional = false)
    pattern = pattern.dup   # we will modify the pattern on the fly

    pattern.each do |predicate, subject, object, filter|

      # validate the triple
      predicate =~ URI::URI_REF or raise ProgrammingError,
        "Valid uriref expected in predicate position instead of '#{predicate}'"

      [subject, object].each do |node|
        node =~ SquishQuery::INTERNAL or
          node =~ SquishQuery::BN or
          node =~ URI::URI_REF or
          raise ProgrammingError,
            "Resource or blank node name expected instead of '#{node}'"
      end

      # list of possible mappings into internal tables
      predicate_map = rdf.map[predicate]

      if predicate_map and
        (subject =~ SquishQuery::BN or
         subject =~ SquishQuery::INTERNAL or
         subject =~ SquishQuery::PARAMETER or
         predicate_map['Resource'])
        # internal predicate and subject is mappable to Resource table

        i = clauses.size

        @clauses[i] = {
          :subject => predicate_map.keys.collect {|table|
            { :node => subject, :table => table, :field => 'id' }
          },
          :object  => predicate_map.keys.collect {|table|
            { :node => object, :table => table, :field => predicate_map[table] }
          },
          :optional => optional
        }
        @clauses[i][:filter] = SqlExpression.new(filter) if filter

        # reverse mapping of the node occurences
        @nodes[subject] ||= { :positions => [] }
        @nodes[subject][:positions].push( { :clause => i, :role => :subject } )
        @nodes[object] ||= { :positions => [] }
        @nodes[object][:positions].push( { :clause => i, :role => :object } )

      else
        # assume reification for unmapped predicates:
        #
        #            | (rdf::predicate ?_stmt_#{i} p)
        # (p s o) -> | (rdf::subject ?_stmt_#{i} s)
        #            | (rdf::object ?_stmt_#{i} o)
        #
        rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        stmt = "?_stmt_#{i}"
        pattern.push [rdf + 'predicate', stmt, predicate],
                     [rdf + 'subject', stmt, subject],
                     [rdf + 'object', stmt, object]
      end
    end
  end

  # if a node can be mapped to more than one [table, field] pair, see if it can
  # be refined based on other occurences of this node in other query clauses
  #
  def refine_ambiguous_properties
    @nodes.each_value do |n|
      map = n[:positions]

      map.each_with_index do |p, i|
        big = @clauses[ p[:clause] ][ p[:role] ]
        next if big.size <= 1   # no refining needed

        (i + 1).upto(map.size - 1) do |j|
          small_p = map[j]
          small = @clauses[ small_p[:clause] ][ small_p[:role] ]

          if small.size < big.size and (refined = big & small).size > 0

            # refine the node...
            @clauses[ p[:clause] ][ p[:role] ] = big = refined

            # ...and its pair
            @clauses[ p[:clause] ][ opposite_role(p[:role]) ].collect! {|pair|
              refined.assoc(pair[0]) ? pair : nil
            }.compact!   
          end
        end
      end
    end

    # drop remaining ambiguous mappings
    # todo: split query for ambiguous mappings
    @clauses.each do |clause|
      next if clause.nil?   # means it was reified
      clause[:subject] = clause[:subject].first
      clause[:object] = clause[:object].first
    end
  end

  def opposite_role(role)
    :subject == role ? :object : :subject
  end

  # return current value of alias counter, remember which table it was assigned
  # to, and increment the counter
  #
  def next_alias(table)
    @ac ||= 'a'
    @aliases ||= {}

    a = @ac.dup
    @aliases[a] = {
      :table => table,
      :join_type => :left,   # see update_alias_join_types_and_filters()
      :filter => []
    }

    @ac.next!
    return a
  end

  def define_relation_aliases
    @nodes.each_value do |n|

      map = n[:positions]

      map.each_with_index do |p, i|
        next if :object == p[:role] or @clauses[ p[:clause] ][:alias]

        table = @clauses[ p[:clause] ][:subject ][:table]

        # see if we've already mapped this node to the same table before
        0.upto(i - 1) do |j|
          same = map[j]

          if @clauses[ same[:clause] ][:alias] and
            @clauses[ same[:clause] ][:subject][:table] == table
            # same node, same table -> same alias

            @clauses[ p[:clause] ][:alias] = @clauses[ same[:clause] ][:alias]
            break
          end
        end

        if @clauses[ p[:clause] ][:alias].nil?
          @clauses[ p[:clause] ][:alias] = next_alias(table)
        end
      end
    end   # optimize: unnecessary aliases are generated
  end

  # inner join the alias if it's mentioned in at least one non-optional clause
  #
  def update_alias_join_types_and_filters
    @clauses.each do |c|
      a = @aliases[ c[:alias] ]

      if not c[:optional]
        a[:join_type] = :inner
      end

      if c[:filter]
        a[:filter].push(c[:filter])
      end
    end
  end

  # Stage 2: Relation Aliases and Join Conditions (storage-impl.txt)
  #
  # result is map of aliases in @aliases and list of join conditions in @jc
  #
  def transform(ground_nodes = [])
    define_relation_aliases
    update_alias_join_types_and_filters

    # [ [ binding1, binding2 ], ... ]
    @jc = []

    @nodes.each do |node, n|
      map = n[:positions]

      # node binding
      first = map.first
      clause = @clauses[ first[:clause] ]
      a = clause[:alias]
      binding = SqlNodeBinding.new(a, clause[ first[:role] ][:field])
      n[:bindings] = [ binding ]

      ground = false

      # join conditions
      1.upto(map.size - 1) do |i|
        p = map[i]
        clause2 = @clauses[ p[:clause] ]
        binding2 = SqlNodeBinding.new(clause2[:alias], clause2[ p[:role] ][:field])

        unless n[:bindings].include?(binding2)
          n[:bindings].push(binding2)
          @jc.push([binding, binding2])
          ground = true
        end
      end

      # ground non-blank nodes
      if node !~ SquishQuery::BN

        if node =~ SquishQuery::INTERNAL   # internal resource id
          @aliases[a][:filter].push SqlExpression.new(binding, '=', $1)

        elsif node =~ SquishQuery::PARAMETER or node =~ SquishQuery::LITERAL
          @aliases[a][:filter].push SqlExpression.new(binding, '=', node)

        else   # external resource uriref

          r = nil
          map.each do |p|
            c = @clauses[ p[:clause] ]
            if 'Resource' == c[:subject][:table]
              r = c[:alias]   # reuse existing mapping to Resource table
              break
            end
          end

          if r.nil?
            r = next_alias('Resource')
            @aliases[r][:join_type] = @aliases[a][:join_type]
            @jc.push([ binding, SqlNodeBinding.new(r, 'id') ])
          end

          @aliases[r][:filter].push SqlExpression.new(
            SqlNodeBinding.new(r, 'literal'), '=', "'false'", 'AND',
            SqlNodeBinding.new(r, 'uriref'), '=', "'true'", 'AND',
            SqlNodeBinding.new(r, 'label'), '=', %{'#{node}'})
        end

        ground = true
      end

      # ground dangling blank nodes to existential quantifier
      unless ground or ground_nodes.include?(node)
        # optimize: check if n[:binding] can be NULL
        @aliases[a][:filter].push SqlExpression.new(binding, 'IS NOT NULL')
      end
    end
  end

  # produce SQL FROM and WHERE clauses from results of transform()
  #
  def generate_tables_and_conditions(global_filter)
    main_path, seen = jc_subgraph_path(:inner)

    main_path and not main_path.empty? or raise RuntimeError,
      'Failed to find table aliases for main query'

    subqueries = ''
    subquery_count = 'a'

    loop do
      subquery_path, new = jc_subgraph_path(:left, seen)
      break if subquery_path.nil? or subquery_path.empty?

      # left join the subquery to the main query
      subquery_alias = '_subquery_' << subquery_count
      subquery_count.next!

      field_count = 'a'

      rebind = {}
      select_nodes = {}

      @nodes.each do |node, n|
        n[:bindings].each do |b|
          if new[b.alias] and not rebind[b]
            # bindings for nodes that's been wrapped into the subquery have to
            # be re-bound for use in the main query

            field = '_field_' << field_count
            field_count.next!

            rebind[b] = SqlNodeBinding.new(subquery_alias, field)
          end
        end

        if r = rebind[ n[:bindings].first ]   # update the global filter
          global_filter.gsub!(/#{Regexp.escape(node)}\b/) do
            select_nodes[ n[:bindings].first ] = true
            r.to_s
          end
        end
      end

      # update bindings in the main query
      main_path.each do |a,|
        @aliases[a][:filter].each do |f|
          f.rebind!(rebind) do |b|
            select_nodes[b] = true
          end
        end
      end

      left_join_on = []

      # find all conditions that join this subquery with main query
      @jc.each do |jc|
        new_index = []
        0.upto(1) {|i| new_index.push(i) if new[ jc[i].alias ] }
        next unless 1 == new_index.size   # lookind for "one new, one seen" case

        new_index = new_index.first
        new_b = jc[new_index]
        seen_b = jc[1 - new_index]

        left_join_on.push SqlExpression.new(seen_b, '=', rebind[new_b])
        select_nodes[new_b] = true
      end

      left_join_on.empty? and raise RuntimeError,
        "Failed to join subquery #{subquery_alias} to the main query"

      select_nodes = select_nodes.keys.collect {|b|
        b.to_s << ' AS ' << rebind[b].field
      }.join(', ')

      tables, conditions = jc_path_to_tables_and_conditions(subquery_path)

      subqueries << "\nLEFT JOIN (\nSELECT #{select_nodes}\nFROM #{tables}"
      subqueries << "\nWHERE " << conditions unless conditions.empty?
      subqueries << "\n) AS #{subquery_alias} ON " << left_join_on.join(' AND ')
    end

    @from, @where = jc_path_to_tables_and_conditions(main_path)
    @from << subqueries

    # global filter is and-ed to the RDF pattern's conditions
    if @where.empty?
      @where = global_filter
    elsif not global_filter.empty?
      @where = %{(#{@where}) AND (#{global_filter})}
    end
  end

  # produces a subgraph path through join conditions linking all aliases with
  # given type that form a single graph and weren't processed yet:
  #
  #  path = [ [start, []], [ next, [ jc, ... ] ], ... ]
  #
  # updates _seen_ hash for all aliases included in the path
  #
  def jc_subgraph_path(join_type, seen = {})
    start = find_alias(join_type, seen)
    return nil if start.nil?

    new = {}
    seen[start] = new[start] = true
    path = [ [start, []] ]

    loop do   # while we can find more connecting joins of the given type
      join_alias = nil
      @jc.each do |jc|
        0.upto(1) do |i|
          a_seen = jc[i].alias
          a_next = jc[1-i].alias

          if seen[a_seen] and (not seen[a_next]) and
            @aliases[a_next][:join_type] == join_type

            join_alias = a_next
            break
          end
        end
      end
      break if join_alias.nil?

      # join it to all seen aliases
      join_on = @jc.find_all do |jc|
        a1, a2 = jc.collect {|b| b.alias }
        (seen[a1] and a2 == join_alias) or (seen[a2] and a1 == join_alias)
      end

      seen[join_alias] = new[join_alias] = true
      path.push([join_alias, join_on])
    end

    [ path, new ]
  end

  def find_alias(join_type, seen = {})
    @aliases.each do |next_alias, a|
      next if seen[next_alias] or a[:join_type] != join_type
      return next_alias
    end

    nil
  end

  # generate FROM and WHERE clauses from a path generated by jc_subgraph_path()
  #
  def jc_path_to_tables_and_conditions(path)
    path = path.dup

    first, = path.shift
    a = @aliases[first]

    tables = a[:table] + ' AS ' + first
    conditions = a[:filter]

    path.each do |join_alias, join_on|
      a = @aliases[join_alias]

      tables <<
        %{\nINNER JOIN #{a[:table]} AS #{join_alias} ON } <<
        (
          join_on.collect {|b1, b2| b1.to_s + ' = ' + b2.to_s } +
          a[:filter]
        ).join(' AND ')
    end

    [ tables, conditions.join("\nAND ") ]
  end
end

end   # module Samizdat
