=begin
 * Name: SiSU information Structuring Universe - Structured information, Serialized Units
 * Author: Ralph Amissah
   * http://www.jus.uio.no/sisu
   * http://www.jus.uio.no/sisu/SiSU/download.html

 * Description: modules shared by the different db types, dbi, postgresql, sqlite

 * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah

 * License: GPL 2 or later

  Summary of GPL 2

  This program is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the Free
  Software Foundation; either version 2 of the License, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA

  If you have Internet connection, the latest version of the GPL should be
  available at these locations:
    http://www.fsf.org/licenses/gpl.html
    http://www.gnu.org/copyleft/gpl.html
    http://www.jus.uio.no/sisu/gpl2.fsf

  SiSU was first released to the public on January 4th 2005

  SiSU uses:

  *  Standard SiSU markup syntax,
  *  Standard SiSU meta-markup syntax, and the
  *  Standard SiSU object citation numbering and system

  © Ralph Amissah 1997, current 2007.
  All Rights Reserved.

 * Ralph Amissah: ralph@amissah.com
                  ralph.amissah@gmail.com
=end
module SiSU_DB_import
  require "#{SiSU_lib}/db_columns"
  require "#{SiSU_lib}/db_load_tuple"
  require "#{SiSU_lib}/shared_html_lite"
  class Import < SiSU_DB_columns::Column_size
    include SiSU_Param
    include SiSU_Screen
    @@dl=nil
    attr_accessor :tp
    def initialize(opt,conn='',sql_type='pg')
      @opt,@conn,@sql_type=opt,conn,sql_type
      @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX
      @env=SiSU_Env::Info_env.new(@opt.fns)
      @dal="#{@env.path.dal}"
      if @opt.fns.empty? or @opt.cmd.empty?; @fnb=''
      else
        @md=SiSU_Param::Parameters.new(@opt).get
        @fnb=@md.fnb
      end
      @suffix=@opt.fns[/(?:.+?)\.[_-]?sst/,1]
      @fnm="#@dal/#{@opt.fns}.meta.rbm"
      @@seg=''
      @@seg_full=''                                                              #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg  is 7
      @col=Hash.new('')
      @col[:ocn]=''
      @counter={}
      sql='SELECT MAX(lid) FROM documents'
      @col[:lid]=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
      @col[:lid] ||=0
      sql='SELECT MAX(nid) FROM endnotes'
      @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
      @id_n ||=0
      @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0
      @db=SiSU_Env::Info_db.new
      @@dl ||=SiSU_Env::Info_env.new.digest.length
    end
    def marshal_load
      require "#{SiSU_lib}/dal"
      @dal_array=SiSU_DAL::Source.new(@opt).get                  # dal file drawn here
      tell=SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.db_psql}::#{@opt.fns}")
      tell.puts_blue unless @opt.cmd =~/q/
      tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnm)
      tell.print_grey if @opt.cmd =~/v/
      case @sql_type
      when /sqlite/                                                              #fix logic for sqlite !
        import_db_metadata(@dal_array)
        import_documents(@dal_array)
        import_db_urls(@dal_array,@fnm)                                     #import OID on/off
        @conn.commit                                                             #sqlite watch
      else
        file_exist=@conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; })
        unless file_exist
          @conn.execute('BEGIN')
          import_db_metadata(@dal_array)
          import_documents(@dal_array)
          import_db_urls(@dal_array,@fnm)                                   #import OID on/off
          @conn.execute('COMMIT')
        else
          @db=SiSU_Env::Info_db.new
          puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.db_psql}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}"
        end
      end
    end
    def special_character_escape(string)
      string.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
      string.gsub!(/<:br>/,"<br />\n")
      string.gsub!(/<:(?:code|alt|group|verse)(?:-end)?>/,'')
      string.gsub!(/<:name#\S+?>/,'')
      string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2')
      string.gsub!(/\{\s*(.+?)\s*\}http:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
    end
    def unicode_special_character_escape(string)
      #string.gsub!(/(["';:,])/, %{\\\\\\1})
    end
    def strip_markup(string) #define rules, make same as in dal clean
      string.gsub!(/<sup>(\d+)<\/sup>/,'[\1]')
      string.gsub!(/<:i[12]>/,'')
      string.gsub!(/(?:&nbsp\\;)+/,' ')
      string.gsub!(/<!T[h]?¡.+?!>/,"[TABLE]\n")                                 #tables
      string.gsub!(/<!¡¡\d+(.+?)!>/,'\1')                                       #tables
      string.gsub!(/¡¡\d+¡/,' ')                                                #tables
      string.gsub!(/¡/,' ')                                                     #tables tidy later
      string.gsub!(/<.+?>/,'')
      string.gsub!(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|ftp)\\\:\S+ /,' [image] ') # else image names found in search
      string.gsub!(/\s\s+/,' ')
      string.strip!
    end
                                                                                 #% import into database tables
    def import_db_metadata(dbi_unit)                                             #% import documents - populate database
      print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/
      @tp={}
      @md=SiSU_Param::Parameters.new(@opt).get
      if @md.title; @tp[:title]=@md.title
        special_character_escape(@tp[:title])
        @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
        sql="SELECT MAX(tid) FROM metadata"
        id_t=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
        @@id_t=id_t if id_t
        @@id_t ||=0
        @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks 0~title
        puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/
      end
      if @md.dc_title; @tp[:long_title]=@md.dc_title
        #sql="SELECT MAX(tid) FROM metadata"
        #id_t=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
        #@@id_t=id_t if id_t
        #@@id_t ||=0
        #@@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks 0~title
        #puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}}
      end
      if @md.subtitle; @tp[:subtitle]=@md.subtitle
        special_character_escape(@tp[:subtitle])
        @tp[:subtitle_f],@tp[:subtitle_i]='subtitle, ',"'#{@tp[:subtitle]}', "
      end
      if @md.dc_creator; @tp[:creator]=@md.dc_creator
        special_character_escape(@tp[:creator])
        @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{@tp[:creator]}', "
      end
      if @md.dc_contributor; @tp[:contributor]=@md.dc_contributor
        special_character_escape(@tp[:contributor])
        @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{@tp[:contributor]}', "
      end
      if @md.translator; @tp[:translator]=@md.translator
        special_character_escape(@tp[:translator])
        @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{@tp[:translator]}', "
      end
      if @md.illustrator; @tp[:illustrator]=@md.illustrator
        special_character_escape(@tp[:illustrator])
        @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{@tp[:illustrator]}', "
      end
      if @md.dc_publisher; @tp[:publisher]=@md.dc_publisher
        special_character_escape(@tp[:publisher])
        @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{@tp[:publisher]}', "
      end
      if @md.prepared_by; @tp[:prepared_by]=@md.prepared_by
        special_character_escape(@tp[:prepared_by])
        @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{@tp[:prepared_by]}', "
      end
      if @md.digitized_by; @tp[:digitized_by]=@md.digitized_by
        special_character_escape(@tp[:digitized_by])
        @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{@tp[:digitized_by]}', "
      end
      if @md.dc_subject; @tp[:subject]=@md.dc_subject
        special_character_escape(@tp[:subject])
        @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{@tp[:subject]}', "
      end
      if @md.dc_description; @tp[:description]=@md.dc_description
        special_character_escape(@tp[:description])
        @tp[:description_f],@tp[:description_i]='description, ',"'#{@tp[:description]}', "
      end
      if @md.abstract; @tp[:abstract]=@md.abstract
        special_character_escape(@tp[:abstract])
        @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{@tp[:abstract]}', "
      end
      if @md.dc_type; @tp[:type]=@md.dc_type
        special_character_escape(@tp[:type])
        @tp[:type_f],@tp[:type_i]='type, ',"'#{@tp[:type]}', "
      end
      #if @md.owner; @tp[:owner]=@md.owner
      #  special_character_escape(@tp[:owner])
      #  @tp[:owner_f],@tp[:owner_i]='owner, ',"'#{@tp[:owner}', "
      #end
      #if @md.copyright; @tp[:copyright]=@md.copyright
      #  special_character_escape(@tp[:copyright])
      #  @tp[:copyright_f],@tp[:copyright_i]='copyright, ',"'#{@tp[:copyright]}', "
      #end
      if @md.dc_rights; @tp[:rights]=@md.dc_rights
        special_character_escape(@tp[:rights])
        @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{@tp[:rights]}', "
      end
      if @md.dc_date; @tp[:date]=@md.dc_date
        special_character_escape(@tp[:date])
        @tp[:date_f],@tp[:date_i]='date, ',"'#{@tp[:date]}', "
      end
      if @md.dc_date_created; @tp[:date_created]=@md.dc_date_created
        special_character_escape(@tp[:date_created])
        @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{@tp[:date_created]}', "
      end
      if @md.dc_date_issued; @tp[:date_issued]=@md.dc_date_issued
        special_character_escape(@tp[:date_issued])
        @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{@tp[:date_issued]}', "
      end
      if @md.dc_date_available; @tp[:date_available]=@md.dc_date_available
        special_character_escape(@tp[:date_available])
        @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{@tp[:date_available]}', "
      end
      if @md.dc_date_modified; @tp[:date_modified]=@md.dc_date_modified
        special_character_escape(@tp[:date_modified])
        @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{@tp[:date_modified]}', "
      end
      if @md.dc_date_valid; @tp[:date_valid]=@md.dc_date_valid
        special_character_escape(@tp[:date_valid])
        @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{@tp[:date_valid]}', "
      end
      if @md.dc_language[:name]; @tp[:language]=@md.dc_language[:name]
        special_character_escape(@tp[:language])
        @tp[:language_f],@tp[:language_i]='language, ',"'#{@tp[:language]}', "
      end
      if @md.language_original[:name]; @tp[:language_original]=@md.language_original[:name]
        special_character_escape(@tp[:language_original])
        @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{@tp[:language_original]}', "
      end
      if @md.dc_format; @tp[:format]=@md.dc_format
        special_character_escape(@tp[:format])
        @tp[:format_f],@tp[:format_i]='format, ',"'#{@tp[:format]}', "
      end
      if @md.dc_identifier; @tp[:identifier]=@md.dc_identifier
        special_character_escape(@tp[:identifier])
        @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{@tp[:identifier]}', "
      end
      if @md.dc_source; @tp[:source]=@md.dc_source
        special_character_escape(@tp[:source])
        @tp[:source_f],@tp[:source_i]='source, ',"'#{@tp[:source]}', "
      end
      if @md.dc_relation; @tp[:relation]=@md.dc_relation
        special_character_escape(@tp[:relation])
        @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{@tp[:relation]}', "
      end
      if @md.dc_coverage; @tp[:coverage]=@md.dc_coverage
        special_character_escape(@tp[:coverage])
        @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{@tp[:coverage]}', "
      end
      if @md.keywords; @tp[:keywords]=@md.keywords
        special_character_escape(@tp[:keywords])
        @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{@tp[:keywords]}', "
      end
      if @md.comments; @tp[:comments]=@md.comments
        special_character_escape(@tp[:comments])
        @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{@tp[:comments]}', "
      end
      if @md.cls_loc; @tp[:cls_loc]=@md.cls_loc
        special_character_escape(@tp[:cls_loc])
        @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{@tp[:cls_loc]}', "
      end
      if @md.cls_dewey; @tp[:cls_dewey]=@md.cls_dewey
        special_character_escape(@tp[:cls_dewey])
        @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{@tp[:cls_dewey]}', "
      end
      if @md.cls_pg; @tp[:cls_pg]=@md.cls_pg
        special_character_escape(@tp[:cls_pg])
        @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{@tp[:cls_pg]}', "
      end
      if @md.cls_isbn; @tp[:cls_isbn]=@md.cls_isbn
        special_character_escape(@tp[:cls_isbn])
        @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{@tp[:cls_isbn]}', "
      end
      if @md.prefix_a; @tp[:prefix_a]=@md.prefix_a
        special_character_escape(@tp[:prefix_a])
        @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{@tp[:prefix_a]}', "
      end
      if @md.prefix_b; @tp[:prefix_b]=@md.prefix_b
        special_character_escape(@tp[:prefix_b])
        @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{@tp[:prefix_b]}', "
      end
      #if @md.suffix; @tp[:suffix]=@md.suffix
      #  special_character_escape(@tp[:suffix])
      #  @tp[:suffix_f],@tp[:suffix_i]='suffix, ',"'#{@tp[:suffix]}', "
      #end
      if @md.fns; @tp[:fns]=@md.fns
        special_character_escape(@tp[:fns])
        @tp[:fns_f],@tp[:fns_i]="filename, ","'#{@tp[:fns]}', "
      end
      #if @md.en[:mismatch] > 0
      #  id,info='WARNING document error in endnote markup, number mismatch',"endnotes: #{@md.en[:note]} != endnote reference marks: #{@md.en[:mark]} (difference = #{@md.en[:mismatch]})"
      #end
      if @md.wc_words; @tp[:wc_words]=@md.wc_words
        @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{@tp[:wc_words]}', "
      end
      if @md.dgst; @tp[:dgst]=@md.dgst
        @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{@tp[:dgst]}', "
      end
      if @md.sc_number; @tp[:sc_number]=@md.sc_number
        @tp[:sc_number_f],@tp[:sc_number_i]='sc_number, ',"'#{@tp[:sc_number]}', "
      end
      if @md.sc_date; @tp[:sc_date]=@md.sc_date
        @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{@tp[:sc_date]}', "
      end
      if @md.generated; @tp[:generated]=@md.generated
        @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@tp[:generated]}', "
      end
      #if @md.sisu_version;                    special_character_escape(@md.sisu_version)
      #  #id,info='Generated by',"#{@md.sisu_version[:project]} #{@md.sisu_version[:version]} #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})"
      #end
      #if @md.ruby_version;                      special_character_escape(@md.ruby_version)
      SiSU_DB::Test.new(self,@opt).verify                                             #% import title names, filenames (tuple)
      @conn.execute(%{
        INSERT INTO metadata (#{@tp[:fns_f]} #{@tp[:suffix_f]} #{@tp[:title_f]} #{@tp[:subtitle_f]} #{@tp[:creator_f]} #{@tp[:illustrator_f]} #{@tp[:translator_f]} #{@tp[:subject_f]} #{@tp[:description_f]} #{@tp[:publisher_f]} #{@tp[:contributor_f]} #{@tp[:prepared_by_f]} #{@tp[:digitized_by_f]} #{@tp[:date_f]} #{@tp[:date_created_f]} #{@tp[:date_issued_f]} #{@tp[:date_valid_f]} #{@tp[:date_available_f]} #{@tp[:date_modified_f]} #{@tp[:type_f]} #{@tp[:format_f]} #{@tp[:identifier_f]} #{@tp[:source_f]} #{@tp[:language_f]} #{@tp[:language_original_f]} #{@tp[:relation_f]} #{@tp[:coverage_f]} #{@tp[:rights_f]} #{@tp[:copyright_f]} #{@tp[:owner_f]} #{@tp[:keywords_f]} #{@tp[:abstract_f]} #{@tp[:comment_f]} #{@tp[:loc_f]} #{@tp[:dewey_f]} #{@tp[:isbn_f]} #{@tp[:pg_f]} #{@tp[:prefix_a_f]} #{@tp[:prefix_b_f]} tid) VALUES (#{@tp[:fns_i]} #{@tp[:suffix_i]} #{@tp[:title_i]} #{@tp[:subtitle_i]} #{@tp[:creator_i]} #{@tp[:illustrator_i]} #{@tp[:translator_i]} #{@tp[:subject_i]} #{@tp[:description_i]} #{@tp[:publisher_i]} #{@tp[:contributor_i]} #{@tp[:prepared_by_i]} #{@tp[:digitized_by_i]} #{@tp[:date_i]} #{@tp[:date_created_i]} #{@tp[:date_issued_i]} #{@tp[:date_valid_i]} #{@tp[:date_available_i]} #{@tp[:date_modified_i]} #{@tp[:type_i]} #{@tp[:format_i]} #{@tp[:identifier_i]} #{@tp[:source_i]} #{@tp[:language_i]} #{@tp[:language_original_i]} #{@tp[:relation_i]} #{@tp[:coverage_i]} #{@tp[:rights_i]} #{@tp[:copyright_i]} #{@tp[:owner_i]} #{@tp[:keywords_i]} #{@tp[:abstract_i]} #{@tp[:comment_i]} #{@tp[:loc_i]} #{@tp[:dewey_i]} #{@tp[:isbn_i]} #{@tp[:pg_i]} #{@tp[:prefix_a_i]} #{@tp[:prefix_b_i]} #{@@id_t});
      })
    end
    def import_documents(dbi_unit)                                                     #% import documents - populate main database table
                                                                                 #% import into substantive database tables (tuple)
      begin
        @col[:tid]=@@id_t
        @en,@en_ast,@en_pls=[],[],[]
        @col[:en_a]=nil
        @col[:en_z]=nil
        dbi_unit.each do |data|
          #data.gsub!(/<[biu]>(.+?)<\/[biu]>/,'\1')                    # remove bold, italics, underscore
          data.gsub!(/<b>(.+?)<\/b>/,'\1')                             # remove bold, italics, underscore
          data.gsub!(/<i>(.+?)<\/i>/,'\1')                             # remove bold, italics, underscore
          data.gsub!(/<u>(.+?)<\/u>/,'\1')                             # remove bold, italics, underscore
          #data.gsub!(/<:name#\S+?>/,'')
          @col[:seg]=@@seg
          if data =~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m                                                    # regular text
            notedata=data.dup
            if data[/^([123])~\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/]
              @col[:lev],txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7
              @col[:lid]+=1
              if txt =~/~\{.+?\}~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en << txt.scan(/~\{(\d+).+?\}~/)
                txt.gsub!(/~\{(\d+).+?\}~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\*.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_ast << txt.scan(/~\[[*](\d+).+?\]~/)
                txt.gsub!(/~\[([*]\d+).+?\]~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\+.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_pls << txt.scan(/~\[[+](\d+).+?\]~/)
                txt.gsub!(/~\[([+]\d+).+?\]~/,'<sup>\1</sup>')
              end
              @col[:body]=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:lev]).lev4_minus
              special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              strip_markup(@col[:plaintext])
              if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
              end
              if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
              end
              if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
              end
              t=SiSU_DB_tuple::Load_tuple.new(@conn,@col,@opt)
              t.tuple
              case @col[:lev]
              when /1/; @col[:lv1]+=1
              when /2/; @col[:lv2]+=1
              when /3/; @col[:lv3]+=1
              end
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            elsif data[/^4~(.+?)\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/]
              @@seg,txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7
              @col[:seg]=@@seg
              @col[:lv4]+=1
              @col[:lid]+=1
              @col[:lev]=4
              if txt =~ /~\{.+?\}~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en << txt.scan(/~\{(\d+).+?\}~/)
                txt.gsub!(/~\{(\d+).+?\}~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\*.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_ast << txt.scan(/~\[[*](\d+).+?\]~/)
                txt.gsub!(/~\[([*]\d+).+?\]~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\+.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_pls << txt.scan(/~\[[+](\d+).+?\]~/)
                txt.gsub!(/~\[([+]\d+).+?\]~/,'<sup>\1</sup>')
              end
              @col[:body]=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:lev],@col[:seg]).lev4_plus
              special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              strip_markup(@col[:plaintext])
              if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
              end
              if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
              end
              if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
              end
              t=SiSU_DB_tuple::Load_tuple.new(@conn,@col,@opt)
              t.tuple
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            elsif data[/^5~(?:~\S+)?(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/]                             # header lev5 seg level
              txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6
              re=/^5~(.+?)\s+/
              @@seg_full=re.match(data)[1] if data=~re #create?
              @@seg ||='' #nil # watch
              @col[:seg]=@@seg
              @col[:lv5]+=1
              @col[:lid]+=1
              @col[:lev]=5
              if txt =~ /~\{.+?\}~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en << txt.scan(/~\{(\d+).+?\}~/)
                txt.gsub!(/~\{(\d+).+?\}~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\*.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_ast << txt.scan(/~\[[*](\d+).+?\]~/)
                txt.gsub!(/~\[([*]\d+).+?\]~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\+.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_pls << txt.scan(/~\[[+](\d+).+?\]~/)
                txt.gsub!(/~\[([+]\d+).+?\]~/,'<sup>\1</sup>')
              end
              @col[:body]=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:lev],@col[:seg]).lev4_plus
              special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              strip_markup(@col[:plaintext])
              if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
              end
              if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
              end
              if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
              end
              t=SiSU_DB_tuple::Load_tuple.new(@conn,@col,@opt)
              t.tuple
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            elsif data[/^6~(?:~\S+)?(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] # header lev6 seg level
              txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6
              re=/^6~(.+?)\s+/
              @@seg_full=re.match(data)[1] if data=~re #create?
              @@seg ||='' #nil # watch
              @col[:seg]=@@seg
              @col[:lv6]+=1
              @col[:lid]+=1
              @col[:lev]=6
              if txt =~ /~\{.+?\}~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en << txt.scan(/~\{(\d+).+?\}~/)
                txt.gsub!(/~\{(\d+).+?\}~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\*.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_ast << txt.scan(/~\[[*](\d+).+?\]~/)
                txt.gsub!(/~\[([*]\d+).+?\]~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\+.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_pls << txt.scan(/~\[[+](\d+).+?\]~/)
                txt.gsub!(/~\[([+]\d+).+?\]~/,'<sup>\1</sup>')
              end
              @col[:body]=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:lev],@col[:seg]).lev4_plus
              special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              strip_markup(@col[:plaintext])
              if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
              end
              if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
              end
              if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
              end
              t=SiSU_DB_tuple::Load_tuple.new(@conn,@col,@opt)
              t.tuple
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            else                                                                 #% regular text
              @col[:lid]+=1
              txt=''
              txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=(/(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/m).match(data).captures
              if txt =~ /~\{.+?\}~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en << txt.scan(/~\{(\d+).+?\}~/)
                txt.gsub!(/~\{(\d+).+?\}~/,'<sup>\1</sup>')
                #txt.gsub!(/~\{(\d+).+?\}~/,'^[\1]') # remove endnote, keep endnote reference number, display as, e.g. [^1]
              end
              if txt =~/~\[\*.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_ast << txt.scan(/~\[[*](\d+).+?\]~/)
                txt.gsub!(/~\[([*]\d+).+?\]~/,'<sup>\1</sup>')
              end
              if txt =~/~\[\+.+?\]~/
                word_mode=txt.scan(/\S+/)
                endnote_range(word_mode)
                @en_pls << txt.scan(/~\[[+](\d+).+?\]~/)
                txt.gsub!(/~\[([+]\d+).+?\]~/,'<sup>\1</sup>')
              end
              if @sql_type=~/pg/ and txt.size > (document_clean - 1)             #% examine pg build & remove limitation
                puts "\n\nTOO LARGE (TXT - see error log)\n\n"
                open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                  error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}")
                end
                txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
              end
              if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
              end
              if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
              end
              if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
              end
              @col[:body]=if txt=~/<!T[h]?¡.+?!~\d+;\w\d+;\w\d+>/ #watch
                SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns]).html_table
              elsif txt=~/<:i1>/
                SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns]).indent1
              elsif txt=~/<:i2>/
                SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns]).indent2
              else
                SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns]).norm
              end
              special_character_escape(@col[:body])
              @col[:plaintext]=@col[:body].dup
              strip_markup(@col[:plaintext])
              t=SiSU_DB_tuple::Load_tuple.new(@conn,@col,@opt)
              t.tuple
              @en,@en_ast,@en_pls=[],[],[]
              @col[:en_a]=@col[:en_z]=nil
              @col[:lev]=@col[:plaintext]=@col[:body]=''
            end
            if notedata =~ /~\{.+?\}~/                                           #% import into database endnotes tables
              endnote_array=notedata.scan(/~\{.+?\}~/)
              endnote_array.each do |inf|
                if inf[/~\{\d+.+?<[0-9a-f]{#{@@dl}}>\}~/]                                       # dal new endnotes 2003w31/1
                  if inf[/~\{(\d+)(.+?)<([0-9a-f]{#{@@dl}})>\}~/]                                       # dal new endnotes 2003w31/1
                    en,txt,digest_clean=$1,$2,$3
                  end
                  @id_n+=1
                  body=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns],en).endnote
                  special_character_escape(body)
                  special_character_escape(txt)
                  strip_markup(txt)
                  if txt.size > (endnote_clean - 1)
                    puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                    open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                      error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                    end
                    txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
                  end
                  if txt
                    #puts "'#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}'" #% endnotes
                    @conn.execute(%{
                      INSERT INTO endnotes (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) VALUES ('#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}', '#{digest_clean}');
                    })
                  end
                end
              end
              word_mode=notedata.scan(/\S+/)
            end
            if notedata =~ /~\[\*.+?\]~/                                           #% import into database endnotes tables
              endnote_array=notedata.scan(/~\[\*.+?\]~/)
              endnote_array.each do |inf|
                if inf[/~\[\*\d+.+?<[0-9a-f]{#{@@dl}}>\]~/]                                       # dal new endnotes 2003w31/1
                  if inf[/~\[[*](\d+)(.+?)<([0-9a-f]{#{@@dl}})>\]~/]                                       # dal new endnotes 2003w31/1
                    en,txt,digest_clean=$1,$2,$3
                  end
                  @id_n+=1
                  body=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns],en).endnote
                  special_character_escape(body)
                  special_character_escape(txt)
                  strip_markup(txt)
                  if txt.size > (endnote_clean - 1)
                    puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                    open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                      error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                    end
                    txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
                  end
                  if txt
                    #puts "'#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}'" #% endnotes
                    @conn.execute(%{
                      INSERT INTO endnotes_asterisk (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) VALUES ('#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}', '#{digest_clean}');
                    })
                  end
                end
              end
              word_mode=notedata.scan(/\S+/)
            end
            if notedata =~ /~\[\+.+?\]~/                                           #% import into database endnotes tables
              endnote_array=notedata.scan(/~\[\+.+?\]~/)
              endnote_array.each do |inf|
                if inf[/~\[\+\d+.+?<[0-9a-f]{#{@@dl}}>\]~/]                                       # dal new endnotes 2003w31/1
                  if inf[/~\[[+](\d+)(.+?)<([0-9a-f]{#{@@dl}})>\]~/]                                       # dal new endnotes 2003w31/1
                    en,txt,digest_clean=$1,$2,$3
                  end
                  @id_n+=1
                  body=SiSU_Format_Shared::CSS_Format.new(txt,@col[:ocn],@col[:ocnd],@col[:ocns],en).endnote
                  special_character_escape(body)
                  special_character_escape(txt)
                  strip_markup(txt)
                  if txt.size > (endnote_clean - 1)
                    puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
                    open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
                      error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
                    end
                    txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
                  end
                  if txt
                    #puts "'#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}'" #% endnotes
                    @conn.execute(%{
                      INSERT INTO endnotes_plus (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) VALUES ('#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}', '#{digest_clean}');
                    })
                  end
                end
              end
              word_mode=notedata.scan(/\S+/)
            end
          end
        end
      rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
      ensure
      end
    end
    def endnote_range(word_array)
      @col[:en_a]=@col[:en_z]=nil
      word_array.each do |w|
        if w[/~[{\[][*+]?(\d+)\s+.+?[}\]]~/]                                                # not tested since change 2003w31
          @col[:en_a]=$1 unless @col[:en_a]
          @col[:en_z]=@col[:en_a].dup unless @col[:en_a]
          @col[:en_z]=$1 if @col[:en_a]
        end
      end
    end
    def import_db_urls(dbi_unit,meta)                                           #% import documents OID - populate database
      begin
        @fnm=meta
        @env=SiSU_Env::Info_env.new(@opt.fns)
        base=@env.url.root
        out=@env.path.output
        markup,meta,latex,plaintext,html_toc,html_doc,xml_sax,xml_dom,pdf_p,pdf_l,concordance,sisupod='','','','','','','','','','','',''
        markup_li,meta_li,latex_li,plaintext_li,html_li,xml_sax_li,xml_dom_li,pdf_p_li,pdf_l_li,concordance_li,sisupod_li='','','','','','','','','','',''
        if @fnb.empty? or @fnb.nil?; p 'file output path error' #remove
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:plain]}")==true)
          plaintext,plaintext_li='plaintext,', "'#{base}/#@fnb/#{@md.fn[:plain]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:toc]}")==true)
          html_toc,html_toc_li='html_toc,', "'#{base}/#@fnb/#{@md.fn[:toc]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:doc]}")==true)
          html_doc,html_doc_li='html_doc,', "'#{base}/#@fnb/#{@md.fn[:doc]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:xhtml]}")==true)
          xhtml,xhtml_li='xhtml,', "'#{base}/#@fnb/#{@md.fn[:xhtml]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:sax]}")==true)
          xml_sax,xml_sax_li='xml_sax,', "'#{base}/#@fnb/#{@md.fn[:sax]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:dom]}")==true)
          xml_dom,xml_dom_li='xml_dom,', "'#{base}/#@fnb/#{@md.fn[:dom]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:odf]}")==true)
          odf,odf_li='odf,', "'#{base}/#@fnb/#{@md.fn[:odf]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:pdf_p]}")==true)
          pdf_p,pdf_p_li='pdf_p,', "'#{base}/#@fnb/#{@md.fn[:pdf_p]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:pdf_l]}")==true)
          pdf_l,pdf_l_li='pdf_l,', "'#{base}/#@fnb/#{@md.fn[:pdf_l]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:concordance]}")==true)
          concordance,concordance_li='concordance,', "'#{base}/#@fnb/#{@md.fn[:concordance]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.tex")==true)
          latex_p,latex_p_li='latex_p,', "'#{base}/#@fnb/#{@opt.fns}.tex',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.landscape.tex")==true)
          latex_l,latex_l_li='latex_l,', "'#{base}/#@fnb/#@opt.fns}.landscape.tex',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:digest]}")==true)
          digest,digest_li='digest,', "'#{base}/#@fnb/#{@md.fn[:digest]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:manifest]}")==true) #revisit, was to be text, this is html
          manifest,manifest_li='manifest,', "'#{base}/#@fnb/#{@md.fn[:manifest]}',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.meta")==true)
          markup,markup_li='markup,', "'#{base}/#@fnb/#{@opt.fns}.meta',"
        end
        if @opt.cmd !~/e/ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.tgz")==true)
          sisupod,sisupod_li='sisupod,', "'#{base}/#@fnb/#{@opt.fns}.tgz',"
        end
      @conn.execute(%{
        INSERT INTO urls (#{plaintext} #{html_toc} #{html_doc} #{xhtml} #{xml_sax} #{xml_dom} #{odf} #{pdf_p} #{pdf_l} #{concordance} #{latex_p} #{latex_l} #{manifest} #{digest} #{markup} #{sisupod} metadata_tid) VALUES (#{plaintext_li} #{html_toc_li} #{html_doc_li} #{xhtml_li} #{xml_sax_li} #{xml_dom_li} #{odf_li} #{pdf_p_li} #{pdf_l_li} #{concordance_li} #{latex_p_li} #{latex_l_li} #{manifest_li} #{digest_li} #{markup_li} #{sisupod_li} #{@@id_t});
      })
      rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
      ensure
      end
    end
  end
end
__END__
