=begin
 * Name: SiSU information Structuring Universe - Structured information, Serialized Units
 * Author: Ralph Amissah
   * http://www.jus.uio.no/sisu
   * http://www.jus.uio.no/sisu/SiSU/download.html

 * Description: modules shared by flatfile output generators

 * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah

 * License: GPL 2 or later

  Summary of GPL 2

  This program is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the Free
  Software Foundation; either version 2 of the License, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA

  If you have Internet connection, the latest version of the GPL should be
  available at these locations:
    http://www.fsf.org/licenses/gpl.html
    http://www.gnu.org/copyleft/gpl.html
    http://www.jus.uio.no/sisu/gpl2.fsf

  SiSU was first released to the public on January 4th 2005

  SiSU uses:

  *  Standard SiSU markup syntax,
  *  Standard SiSU meta-markup syntax, and the
  *  Standard SiSU object citation numbering and system

  © Ralph Amissah 1997, current 2007.
  All Rights Reserved.

 * Ralph Amissah: ralph@amissah.com
                  ralph.amissah@gmail.com
=end
module SiSU_text_utils
  class Wrap
    def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil)
      @para,@n_char_max,@n_indent=para,n_char_max,n_indent
      @br="\n"
      @n_hang=unless n_hang;  @n_hang=@n_indent
      else                    n_hang
      end
    end
    def line_wrap
      space=' '
      spaces_indent,spaces_hang="#@br#{space*@n_indent}",space*@n_hang
      line=0
      out=[]
      out[line]=''
      #line=0,out,out[line]=0,[],''
      #@para.gsub!(/<br(?: \/)?>/,"\n") #watch #added
      words=@para.scan(/\S+/)
      while words != ''
        word=words.shift
        if not word
          out[line].strip!.squeeze!(' ') unless out[line].empty? #check
          break
        elsif (out[line].length + word.length) > (@n_char_max - @n_indent) and out[line] =~/\S+/
          out[line].strip!.squeeze!(' ')
          line += 1
        end
        out[line]="#{out[line]} #{word}" if word
      end
      out.join(spaces_indent).gsub(/\A\n+/m,'').insert(0,spaces_hang)
    end
    def line_wrap_indent1
      @n_indent,@n_hang=2,2
      line_wrap
    end
    def line_wrap_endnote
      @n_indent,@n_hang=4,2
      line_wrap
    end
  end
#end
#module SiSU_scan
  class Header_scan
    def initialize(md,para)
      @regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/
      para=para.gsub(@regxcl,'').dup
      @md,@p=md,para
    end
    def extract(tag,tag_content,type,attrib)
      dc=if dc_tag and dc_content
        [dc_tag,dc_content,{dc_tag=>dc_content}]
      else nil
      end
    end
    def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825
      @tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib
      def label #element
        @tag
      end
      def type
        @type
      end
      def text
        @tag_content
      end
      def info  #element text
        @tag_content
      end
      def attribute
        @attrib
      end
      def element
        @tag
      end
      def attrib
        @attrib
      end
      def el
        @tag
      end
      self
    end
    def start_is_zero
      meta=case @p
      when /^0~(title)\s+(.+?)$/;               header($1,@md.dc_title,'meta','dc') #dc 1
      #when /^0~(subtitle)\s+(.+?)$/;            header($1,$2)
      when /^0~(creator|author)\s+(.+?)$/;      header('creator',$2,'meta','dc')    #dc 2
      when /^0~(subject)\s+(.+?)$/;             header($1,$2,'meta','dc')           #dc 3
      when /^0~(description)\s+(.+?)$/;         header($1,$2,'meta','dc')           #dc 4
      when /^0~(publisher)\s+(.+?)$/;           header($1,$2,'meta','dc')           #dc 5
      when /^0~(contributor)\s+(.+?)$/;         header($1,$2,'meta','dc')           #dc 6
      when /^0~(date)\s+(.+?)$/;                header($1,$2,'meta','dc')           #dc 7
      when /^0~(date\.created)\s+(.+?)$/;       header($1,$2,'meta','extra')
      when /^0~(date\.issued)\s+(.+?)$/;        header($1,$2,'meta','extra')
      when /^0~(date\.available)\s+(.+?)$/;     header($1,$2,'meta','extra')
      when /^0~(date\.valid)\s+(.+?)$/;         header($1,$2,'meta','extra')
      when /^0~(date\.modified)\s+(.+?)$/;      header($1,$2,'meta','extra')
      when /^0~(type)\s+(.+?)$/;                header($1,$2,'meta','dc')           #dc 8
      when /^0~(format)\s+(.+?)$/;              header($1,$2,'meta','dc')           #dc 9
      when /^0~(identifier)\s+(.+?)$/;          header($1,$2,'meta','dc')           #dc 10
      when /^0~(source)\s+(.+?)$/;              header($1,$2,'meta','dc')           #dc 11
      when /^0~(language)\s+(.+?)$/;            header($1,$2,'meta','dc')           #dc 12
      when /^0~(relation)\s+(.+?)$/;            header($1,$2,'meta','dc')           #dc 13
      when /^0~(coverage)\s+(.+?)$/;            header($1,$2,'meta','dc')           #dc 14
      when /^0~(rights)\s+(.+?)$/;              header($1,$2,'meta','dc')           #dc 15
      when /^0~(keywords)\s+(.+?)$/;            header($1,$2,'meta','extra')
      when /^0~(copyright)\s+(.+?)$/;           header($1,$2,'meta','extra')
      when /^0~(translator|translated_by)\s+(.+?)$/;   header('translator',$2,'meta','extra')
      when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra')
      when /^0~(prepared_by)\s+(.+?)$/;         header($1,$2,'meta','extra')
      when /^0~(digitized_by)\s+(.+?)$/;        header($1,$2,'meta','extra')
      when /^0~(comments?)\s+(.+?)$/;           header($1,$2,'meta','extra')
      when /^0~(abstract)\s+(.+?)$/;            header($1,$2,'meta','extra')
      when /^0~(tags?)\s+(.+?)$/;               header($1,$2,'meta','extra')
      when /^0~(catalogue)\s+(.+?)$/;           header($1,$2,'meta','extra')
      when /^0~(class(?:ify)?_loc)\s+(.+?)$/;   header('classify_loc',$2,'meta','extra')
      when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
      when /^0~(class(?:ify)?_pg)\s+(.+?)$/;    header('classify_pg',$2,'meta','extra')
      when /^0~(class(?:ify)?_isbn)\s+(.+?)$/;  header('classify_isbn',$2,'meta','extra')
      when /^0~(toc|structure)\s+(.+?)$/;       header('structure',$2,'meta','extra')
      when /^0~(toc|structure)\s+(.+?)$/;       header('structure',$2,'proc','instruct')
      when /^0~(level|page|markup)\s+(.+?)$/;   header('markup',$2,'process','instruct')
      when /^0~(bold)\s+(.+?)$/;                header($1,$2,'process','instruct')
      when /^0~(italics|itali[sz]e)\s+(.+?)$/;  header('italicize',$2,'process','instruct')
      when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
      when /^0~(skin)\s+(.+?)$/;                header($1,$2,'process','instruct')
      when /^0~(css|stylesheet)\s+(.+?)$/;      header('css',$2,'process','instruct')
      when /^0~(links)\s+(.+?)$/;               header($1,$2,'process','instruct')
      when /^0~(prefix)\s+(.+?)$/;              header($1,$2,'process','instruct')
      when /^0~(suffix)\s+(.+?)$/;              header($1,$2,'process','instruct')
      when /^0~(information)\s+(.+?)$/;         header($1,$2,'process','instruct')
      when /^0~(contact)\s+(.+?)$/;             header($1,$2,'process','instruct')
      when /^0~(rcs|cvs)\s+(.+?)$/;             header('version',$2,'process','instruct')
      else nil
      end
    end
    def start_is_at
      meta=case @p
      when /^@(title):\s+(.+?)$/;               header($1,@md.dc_title,'meta','dc') #dc 1
      #when /^@(subtitle):\s+(.+?)$/;            header($1,$2,'meta','extra')
      when /^@(creator|author):\s+(.+?)$/;      header('creator',$2,'meta','dc')    #dc 2
      when /^@(subject):\s+(.+?)$/;             header($1,$2,'meta','dc')           #dc 3
      when /^@(description):\s+(.+?)$/;         header($1,$2,'meta','dc')           #dc 4
      when /^@(publisher):\s+(.+?)$/;           header($1,$2,'meta','dc')           #dc 5
      when /^@(contributor):\s+(.+?)$/;         header($1,$2,'meta','dc')           #dc 6
      when /^@(date):\s+(.+?)$/;                header($1,$2,'meta','dc')           #dc 7
      when /^@(date\.created):\s+(.+?)$/;       header($1,$2,'meta','extra')
      when /^@(date\.issued):\s+(.+?)$/;        header($1,$2,'meta','extra')
      when /^@(date\.available):\s+(.+?)$/;     header($1,$2,'meta','extra')
      when /^@(date\.valid):\s+(.+?)$/;         header($1,$2,'meta','extra')
      when /^@(date\.modified):\s+(.+?)$/;      header($1,$2,'meta','extra')
      when /^@(type):\s+(.+?)$/;                header($1,$2,'meta','dc')           #dc 8
      when /^@(format):\s+(.+?)$/;              header($1,$2,'meta','dc')           #dc 9
      when /^@(identifier):\s+(.+?)$/;          header($1,$2,'meta','dc')           #dc 10
      when /^@(source):\s+(.+?)$/;              header($1,$2,'meta','dc')           #dc 11
      when /^@(language):\s+(.+?)$/;            header($1,$2,'meta','dc')           #dc 12
      when /^@(relation):\s+(.+?)$/;            header($1,$2,'meta','dc')           #dc 13
      when /^@(coverage):\s+(.+?)$/;            header($1,$2,'meta','dc')           #dc 14
      when /^@(rights):\s+(.+?)$/;              header($1,$2,'meta','dc')           #dc 15
      when /^@(keywords):\s+(.+?)$/;            header($1,$2,'meta','extra')
      when /^@(copyright):\s+(.+?)$/;           header($1,$2,'meta','extra')
      when /^@(translator|translated_by):\s+(.+?)$/;   header('translator',$2)
      when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2)
      when /^@(prepared_by):\s+(.+?)$/;         header($1,$2,'meta','extra')
      when /^@(digitized_by):\s+(.+?)$/;        header($1,$2,'meta','extra')
      when /^@(comments?):\s+(.+?)$/;           header($1,$2,'meta','extra')
      when /^@(abstract):\s+(.+?)$/;            header($1,$2,'meta','extra')
      when /^@(tags?):\s+(.+?)$/;               header($1,$2,'meta','extra')
      when /^@(catalogue):\s+(.+?)$/;           header($1,$2,'meta','extra')
      when /^@(class(?:ify)?_loc):\s+(.+?)$/;   header('classify_loc',$2,'meta','extra')
      when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
      when /^@(class(?:ify)?_pg):\s+(.+?)$/;    header('classify_pg',$2,'meta','extra')
      when /^@(class(?:ify)?_isbn):\s+(.+?)$/;  header('classify_isbn',$2,'meta','extra')
      when /^@(toc|structure):\s+(.+?)$/;       header('structure',$2,'process','instruct')
      when /^@(level|page|markup):\s+(.+?)$/;   header('markup',$2,'process','instruct')
      when /^@(bold):\s+(.+?)$/;                header($1,$2,'process','instruct')
      when /^@(italics|itali[sz]e):\s+(.+?)$/;  header('italicize',$2,'process','instruct')
      when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
      when /^@(skin):\s+(.+?)$/;                header($1,$2,'process','instruct')
      when /^@(css|stylesheet):\s+(.+?)$/;      header('css',$2,'process','instruct')
      when /^@(links):\s+(.+?)$/;               header($1,$2,'process','instruct')
      when /^@(prefix):\s+(.+?)$/;              header($1,$2,'process','instruct') #add a & b
      when /^@(suffix):\s+(.+?)$/;              header($1,$2,'process','instruct')
      when /^@(information):\s+(.+?)$/;         header($1,$2,'process','instruct')
      when /^@(contact):\s+(.+?)$/;             header($1,$2,'process','instruct')
      when /^@(rcs|cvs):\s+(.+?)$/;             header('version',$2,'process','instruct')
      else nil
      end
    end
    def dublin
      out=if @p =~/^0~\S+\s/;                  start_is_zero
      elsif @p =~/^@\S+:[+-]?\s/;              start_is_at
      else nil
      end
    end
    def meta
      out=if @p =~/^0~\S+\s/;                  start_is_zero
      elsif @p =~/^@\S+:[+-]?\s/;              start_is_at
      else nil
      end
    end
  end
end
module SiSU_text_parts_flatfile
  class Split_text_object
    @@dl=nil
    attr_reader :format,:text,:ocn,:lev_para_ocn
    def initialize(md,para)
      @md,@para=md,para
      @format,@ocn='null','null'
      #@format,@ocn=nil,nil
      @@dl ||=SiSU_Env::Info_env.new.digest.length
    end
    def lev_segname_para_ocn
      if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/
        if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
          @format,segname,@text,@ocn=$1,$2,$3,$4
          @format="#@format~#{segname}" #
        elsif  /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
          @format,@text,@ocn=$1,$2,$3
        elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
          @format,@text,@ocn=$1,$2,$3
        elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
          @@alt_id_count+=1
          @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
          @format="#@format~#{segname}" #
        elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
          @@alt_id_count+=1
          @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}"
        end
      else
        if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
          @text,@ocn=$1,$2
        end
        if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06
          @text=/(.+?)/m.match(@para)[1]
        end
        if /^(\d)~\S*\s+(.+)/m.match(@para)
          @format,@text=$1,$2
        end
      end
      @lev_para_ocn=if @para =~/.+<~\d+>/ #hmmm, watch
        Format::ParaSiSU.new(@md,@format,@text,@ocn)
      else Format::ParaSiSU.new(@md,@format,@text,'<~0>')
      end
      self
    end
  end
end
__END__

