=begin
 * Name: SiSU - Simple information Structuring Universe - Structured information, Serialized Units
 * Author: Ralph Amissah
   * http://www.jus.uio.no/sisu
   * http://www.jus.uio.no/sisu/SiSU/download

 * Description: plaintext text generation, stripped plaintext output (unix, linefeed)

 * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Ralph Amissah

 * License: GPL 2 or later

  Summary of GPL 2

  This program is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the Free
  Software Foundation; either version 2 of the License, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along
  with this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA

  If you have Internet connection, the latest version of the GPL should be
  available at these locations:
    http://www.fsf.org/licenses/gpl.html
    http://www.gnu.org/copyleft/gpl.html
    http://www.jus.uio.no/sisu/gpl2.fsf

  SiSU was first released to the public on January 4th 2005

  SiSU uses:
  
  *  Standard SiSU markup syntax,
  *  Standard SiSU meta-markup syntax, and the
  *  Standard SiSU object citation numbering and system
  
  © Ralph Amissah 1997, current 2006.
  All Rights Reserved.

 * Notes: tidy -ascii index.xml >> index.tidy

 * Ralph Amissah: ralph@amissah.com
                  ralph.amissah@gmail.com
=end
module SiSU_Plaintext
  require SiSU_lib + '/metaverse'
  require SiSU_lib + '/sysenv'
  include SiSU_Env
  include SiSU_Param
  include SiSU_Viz
  require SiSU_lib + '/plaintext_format'
  include Format
  require SiSU_lib + '/common_flatfile'
  pwd=Dir.pwd
  @@txt=SiSU_Viz::Txt.new
  @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
  @@tablefoot=''
  class Source
    def initialize(opt)
      @opt=opt
      if @opt.fns =~/(.+?)\.[_-]?sst$/
        case @opt.cmd
        when /[af]/: @@dostype='unix footnotes'
        when /e/:    @@dostype='unix endnotes'
        when /[AF]/: @@dostype='msdos footnotes'
        when /E/:    @@dostype='msdos endnotes'
        end
      else puts "#{sf} not a processed file type"
      end
    end
    def read
      begin
        @md=SiSU_Param::Parameters.new(@opt).get
        @env=SiSU_Env::Info_dir.new(@opt.fns)
        path=SiSU_Env::Info_dir.new(@opt.fns).tell_output_path
        tool=if @opt.cmd =~/[MVv]/: "#{@env.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}"
        else ''
        end
        tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool)
        tell.green_hi_blue unless @opt.cmd =~/q/
        tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.tell_output_path}/#{@md.fnb}/#{@md.fn[:plain]}")
        tell.flow if @opt.cmd =~/[MV]/
        my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns)
        @metaverse_array=SiSU_Metaverse::Source.new(@opt).get # metaverse file drawn here
        SiSU_Plaintext::Source::Scroll.new(@metaverse_array,@md).songsheet
        SiSU_Env::Info_skin.new(@md).select
      rescue: SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
      ensure
      end
    end
    private
    class Split_text_object <Source
      require SiSU_lib + '/plaintext_format'
      include SiSU_Viz
      include Format
      @@alt_id_count=0
      @@dp=nil
      attr_reader :format,:lev,:text,:ocn,:lev_para_ocn
      def initialize(para)
        @para=para
        @format,@ocn='null','null'
        #@format,@ocn=nil,nil
        @@dp ||=SiSU_Env::Info_dir.new.digest_pattern
      end
      def lev_segname_para_ocn 
        if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>.*/
          if /(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/im.match(@para)
            @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5
          elsif  /(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/im.match(@para)
            @format,@lev,@text,@ocn=$1,$2,$3,$4
          elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/im.match(@para)
            @format,@text,@ocn=$1,$2,$3,$4
          elsif /(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#{@@dp}:#{@@dp}>/im.match(@para)
            @@alt_id_count+=1
            @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}"
            #@format="#@format~#{segname}" # 
          elsif  /(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#{@@dp}:#{@@dp}>/im.match(@para)
            @@alt_id_count+=1
            @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
          end
        else
          if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/im.match(@para)
            @text,@ocn=$1,$2
          end
          if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>|^$/ #added 2002w06
            @text=/(.+?)/im.match(@para)[1]
          end
          if /^((\d)~(?:~\S+)?)\s+(.+)/im.match(@para)
            @format,@lev,@text=$1,$2,$3
          end
        end
        format=@format.dup
        @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/
          Format::Format_text_object.new(format,@text,@ocn)
        else
          Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#{@@dp}:#{@@dp}>")
        end
        self
      end
    end
    class Scroll <Source
      require SiSU_lib + '/common_text'
      include SiSU_text_utils
      @@endnotes_para=Array.new
      @@plaintext=Hash.new
      @@plaintext[:body],@@plaintext[:open],@@plaintext[:close],@@plaintext[:head],@@plaintext[:metadata],@@plaintext[:tail],@@plaintext[:endnotes]=Array.new,Array.new,Array.new,Array.new,Array.new,Array.new,Array.new
      @@dp=nil
      def initialize(data,md)
        @data,@md=data,md
        @margin=SiSU_Viz::Margin.new
        @@dp ||=SiSU_Env::Info_dir.new.digest_pattern
        @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>$/ #m # 2004w18 pb pn removal added
        @url=SiSU_Viz::Url.new
        @tab="\t"
        @br=case md.cmd
        when /[af]/
          @@dostype='unix footnotes'
          "\n"
        when /e/
          @@dostype='unix endnotes'
          "\n"
        when /[AF]/
          @@dostype='msdos footnotes'
          "\r\n"
        when /E/
          @@dostype='msdos endnotes'
          "\r\n"
        else "\n"
        end
      end
      def songsheet
        Scroll.new(@data,@md).pre
        Scroll.new(@data,@md).markup
        Scroll.new(@data,@md).post
        Scroll.new(@data,@md).publish
        #@data.each { |x|  p x if x =~/\[table/ }
      end
      # Used for extraction of endnotes from paragraphs
      def extract_endnotes(para='')
        notes=para.scan(/~\{([\d*+]+\s+.+?)\s*<#{@@dp}>\}~/)
        @n=Array.new
        notes.each do |n| #high cost to deal with <br> appropriately within plaintext, consider
          n=n.dup.to_s
          if n =~/<br(?: \/)?>/
            fix = n.split(/<br(?: \/)?>/) #watch #added
            fix.each do |x|
              unless x.empty?: @n << x 
              end
            end
          else              @n << n
          end
        end
        notes=@n.flatten
        notes.each do |e| 
          util=if e.to_s =~/^\[[\d*+]+\]:/: SiSU_text_utils::Paragraph.new(e.to_s,70,4,1)
          else                             SiSU_text_utils::Paragraph.new(e.to_s,70,1,1)
          end
          wrap=util.line_wrap
          if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
            wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB

\\1[\\2]: \\3
GSUB
                      )
          else
            wrap.gsub!(/^(.+)\Z/m, <<GSUB
\\1
GSUB
                      )
          end
          @@plaintext[:endnotes] << wrap
          @@endnotes_para << wrap
        end
      end
      def plaintext_metadata(meta='',verse='')
        util=SiSU_text_utils::Paragraph.new(verse,70,15,1)
        verse=util.line_wrap
        @@plaintext[:metadata] <<<<WOK

#{@tab}#{meta}: #{verse}
WOK
      end
      def plaintext_tail
        generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})"  if @md.sisu_version[:version]
        lastdone="Last Generated on: #{Time.now}"
        rubyv="Ruby version: #{@md.ruby_version}"
        sc=if @md.sc_info
          "Source file:    #{@md.sc_filename}#{@br}Version number: #{@md.sc_number}#{@br}Version date:   #{@md.sc_date}#{@br}"
        else ''
        end
        @@plaintext[:tail] <<<<WOK
#@br
Other versions of this document: #@br
manifest:
   #{@url.root_http}/#{@md.fnb}/#{@md.fn[:manifest]}#@br
html:
   #{@url.root_http}/#{@md.fnb}/#{@md.fn[:toc]}#@br
pdf:
   #{@url.root_http}/#{@md.fnb}/#{@md.fn[:pdf_p]}
   #{@url.root_http}/#{@md.fnb}/#{@md.fn[:pdf_l]}#@br
plaintext (plain text):
   #{@url.root_http}/#{@md.fnb}/#{@md.fn[:plain]}#@br
at:
   #{@url.site}#@br

#{sc}
* #{generator}
* #{rubyv}
* #{lastdone}
* SiSU #{@url.sisu}
WOK
      end
      def plaintext_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document
        lv=lv.to_i
        n=lv - 1
        n3=lv + 2
        lv=nil if lv == 0
        extract_endnotes(para)
        para.gsub!(/~\{([\d*+]+)\s+(?:.+?)\}~/,'[^\1]') # endnote marker marked up
        wrapped=if para[@regx]
          paragraph="#{para[@regx,2]}"
          if paragraph.include? '<:i1>'
            paragraph.gsub!(/<:i1>/,'')
            util=SiSU_text_utils::Paragraph.new(paragraph,70,2)
          else util=SiSU_text_utils::Paragraph.new(paragraph,70,0)
          end
          util.line_wrap
        end
        if lv
          times=wrapped.length
          times=70 if times > 70
          @@plaintext[:body] << case lv
          when 1:      wrapped.upcase << @br << '*'*times << @br
          when 2..3:   wrapped.upcase << @br << '='*times << @br
          when 4:      wrapped.upcase << @br << '-'*times << @br
          when 5..6:   wrapped.upcase << @br << '.'*times << @br
          end
        else
          @@plaintext[:body] << wrapped << @br # main text, contents, body KEEP
        end
        if @@endnotes_para and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong
          @@plaintext[:body] << @br
          @@endnotes_para.each {|e| @@plaintext[:body] << e << @br} 
        elsif @@endnotes_para and @@dostype =~/endnote/
          @@plaintext[:body] << @br*2
        end
        @@endnotes_para=Array.new
      end
      # Used to clean words
      def tidywords(wordlist)
        wordlist.each do |x|
          #x.gsub!(/&/,'&amp;') unless x =~/&\S+;/
        end
      end
      def markup                                                               # Used for major markup instructions
        data=@data
        dir=SiSU_Env::Info_dir.new(@md.fns)
        @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new,Array.new,Array.new,Array.new,Array.new,Array.new
        @rcdc=false
        (0..6).each { |x| @cont[x]=@level[x]=false } 
        (4..6).each { |x| @plaintext_contents_close[x]='' } 
        plaintext_tail #($1,$2)
        fix=Array.new
        #data.each do |para| #high cost to deal with <br> appropriately within plaintext, consider
        #  para=para.dup
        #  if para =~/<br(?: \/)?>/
        #    p para
        #    fix = para.split(/<br(?: \/)?>/) #watch #added
        #    fix.each do |x|
        #      if x =~/\S+/: @data_mod << x
        #      end
        #    end
        #  else              @data_mod << para
        #  end
        #end
        #data=@data_mod.flatten
        data.each do |para|
          para.gsub!(/.+?<-#>/i,'')                                           # remove dummy headings (used by html) #check
          para.gsub!(/_\*\s+/,'* ')                                           # bullet markup, marked down
          para.gsub!(/&#169;/,'©')                                           # bullet markup, marked down
          para.gsub!(/&amp;/,'&')                                           # bullet markup, marked down
          para.gsub!(/<sup>(.+?)<\/sup>/,"^\\1^")
          para.gsub!(/<sub>(.+?)<\/sub>/,"[\\1]")
          para.gsub!(/<i>(.+?)<\/i>/,"/\\1/")
          para.gsub!(/<b>(.+?)<\/b>/,"*\\1*")
          para.gsub!(/<u>(.+?)<\/u>/,"_\\1_")
          para.gsub!(/<:(?:group|poem|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>)?/,'')
          para.gsub!(/<:p[bn]>/mi,'')                                         # remove page breaks
          para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/i,'') # remove empty lines
          para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1')
          para.gsub!(/<:name#\S+?>/,'')                                       # remove name links
          para.gsub!(/&nbsp;/,' ')                                            # decide on
          para.gsub!(/\{(\S+?\.(?:png|jpg)) .+?\}(?:(?:https?|ftp):\/\/\S+|image)/,"    [ \\1 ]") #"[ #{dir.url_images_local}\/\\1 ]")
          para.gsub!(/^\{\S+?\.(?:png|jpg)\s+.+?"(.*?)"\s*\}\S+/,"[image: \"\\1\"]")
          para.gsub!(/<!TZ.+/i,'')
          para.gsub!(/^<!T.+/,"#@br[table: ] <~#>"); 
          wordlist=para.scan(/\S+/)
          para=tidywords(wordlist).join(' ').strip
          if para =~/^0~(\S+)\s+(.+?)$/ # for headers
            dc=SiSU_scan::Header_scan.new(@md,para).dublin
            if dc: plaintext_metadata(dc[0],dc[1])
	    end
	  end
          @rcdc=true if @rcdc==false and (para =~/~metadata/ or para =~/1~\s+Document Information/)
          if para !~/(^0~|<ENDNOTES>|<EOF>)/ 
            if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change
              paranum=para[@regx,3]
              @p_num=Format::Paragraph_number.new(paranum)
            end
            @sto=Split_text_object.new(para).lev_segname_para_ocn
            ### problem in scroll, it appears tables are getting paragraph numbers
            unless @rcdc
              m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/
              if para =~m and para=~/\S+/
                para=case @sto.format
                when /^(1)~(?:(\S+))?/
                  plaintext_structure(para,$1,@sto.ocn,$2)
                  @sto.lev_para_ocn.heading_body1
                when /^(2)~(?:(\S+))?/
                  plaintext_structure(para,$1,@sto.ocn,$2)
                  @sto.lev_para_ocn.heading_body2
                when /^(3)~(?:(\S+))?/
                  plaintext_structure(para,$1,@sto.ocn,$2)
                  @sto.lev_para_ocn.heading_body3
                when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object
                  plaintext_structure(para,$1,@sto.ocn,$2)
                  @sto.lev_para_ocn.heading_body4
                when /^(5)~(?:(\S+))?/
                  plaintext_structure(para,$1,@sto.ocn,$2)
                  @sto.lev_para_ocn.heading_body5
                when /^(6)~(?:(\S+))?/
                  plaintext_structure(para,$1,@sto.ocn,$2)
                  @sto.lev_para_ocn.heading_body6
                #when /^(i1)$/i
                #  #formatMono.gsubBody
                #  #para=@sto[:lev_para_ocn].scrIndent1
                #when /^(i2)$/i
                #  formatMono.gsubBody
                #  para=@sto[:lev_para_ocn].scrIndent2
                #when /^(center)$/i
                #  para.gsub!(/(.+)/,
                #    %{<center>(\\1)</center>})
                #  para=@sto[:lev_para_ocn].scrPara
                #when /^(b|bold)$/i
                #  para.gsub!(/(.+)/,
                #    %{<b>(\\1)</b>})
                #  para=@sto[:lev_para_ocn].scrPara
                #when /null/ # see whether u can improve
                #    if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/)
                #      #formatMono.gsubBody
                #      #para=@sto[:lev_para_ocn].scrPara
                #    end
                else
                  plaintext_structure(para,nil,nil,nil) #watch may be problematic
                  para
                end
              elsif para =~/(Note|Endnotes?)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/
              elsif para =~/(MetaData)/ and para =~/<~(\d+);[um]\d+;\w\d+><#{@@dp}:#{@@dp}>/ #debug 2003w46 add rc info ####suspect visit
                #formatMono=MonoSiSU.new('<br /><a name="metadata">MetaData</a>')
                #para=formatMono.bold_para
              elsif para.include? 'Owner Details' and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>/
                #formatMono=MonoSiSU.new('<br /><a name="owner.details">Owner Details</a>')
                #@@plaintext[:owner_details]=formatMono.bold_para
                #para=''
              elsif para =~/(¡|<!Th?)/i #tables !
              elsif para =~/(.*)<!#!>(.*)/i
                one,two=$1,$2
                format_text=Format_text_object.new(one,two)
                para=format_text.seg_no_paranum
              end
              para='' if (para =~/<a name="n\d+">/ and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
              case para
              when /<:i1>/
                if (para =~/.*<:#>.*$/)
                  format_text=Format_text_object.new(para,'')
                  para=format_text.scr_indent_one_no_paranum
                end
              when /<:i2>/
                if (para =~/.*<:#>.*$/)
                  format_text=Format_text_object.new(para,'')
                  para=format_text.scr_indent_one_no_paranum
                end
              end
              if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/)
                # i don't get the condition for no paranum
              end
              if para =~/<:center>/i
                one,two=/(.*)<:center>(.*)/i.match(para)[1,2]
                format_text=Format_text_object.new(one,two)
                para=format_text.center
              end
            end
            para.gsub!(/<!.+!>/i,' ') if para ## Clean Prepared Text
            para.gsub!(/<:\S+>/i,' ') if para ## Clean Prepared Text
          end
        end
      end
      def pre
      end
      def post
      end
      def publish
        divider="="
        content=Array.new
        data=@data
        content << @@plaintext[:open]
        content << @@plaintext[:head]
        content << @@plaintext[:body]
        content << @@plaintext[:endnotes] if @@dostype =~/endnotes/ 
        content << "#@br#{divider*70}#@br"
        content << @@plaintext[:metadata]
        content << "#@br#{divider*70}#@br" if @md.stmp =~/\w+/ #not used?
        content << @@plaintext[:owner_details] if @md.stmp =~/\w+/ #not used?
        content << "#@br#{divider*70}#@br" if @@plaintext[:tail]
        content << @@plaintext[:tail]
        Output.new(content.to_s,@md).plaintext
        @@plaintext[:head],@@plaintext[:body],@@plaintext[:tail],@@plaintext[:metadata]=Array.new,Array.new,Array.new,Array.new
      end
    end
    class Output <Source
      include SiSU_Param
      include SiSU_Env
      def initialize(content,md)
        @content,@md=content,md
      end
      def plaintext                                                            #%plaintext output
        filename_plaintext=SiSU_Env::SiSU_file.new(@md).mkdir
        filename_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile
        @sisu=Array.new
        @content.each do |para|                                                # this is a hack
          if para =~/^\S/
            if para !~/^([*=-]|\.){5}/: filename_plaintext.puts para           #unix plaintext
            else                        filename_plaintext.puts para           #unix plaintext
            end
          else filename_plaintext.puts para # if para =~/^\s/
          end
        end
      end
    end
  end
end
__END__
