/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999 The Apache Software Foundation.  All rights 
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:  
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written 
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

package org.apache.xerces.readers;

import org.apache.xerces.framework.XMLErrorReporter;
import org.apache.xerces.utils.CharDataChunk;
import org.apache.xerces.utils.StringPool;
import org.apache.xerces.utils.ImplementationMessages;
import java.io.InputStream;

/**
 * Simple character-based version of a UTF8 reader.
 *
 * This class is not commonly used, but is provided as a much simplified
 * example of the UTF8Reader class that uses the AbstractCharReader to
 * perform all of the reader functions except for filling each buffer
 * of the character data when needed (fillCurrentChunk).  We read the
 * input data from an InputStream and perform end-of-line normalization
 * as we process that data.
 *
 * @version
 */
final class UTF8CharReader extends AbstractCharReader {
    //
    //
    //
    UTF8CharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, InputStream dataStream, StringPool stringPool) throws Exception {
        super(entityHandler, errorReporter, sendCharDataAsCharArray, stringPool);
        fInputStream = dataStream;
        fillCurrentChunk();
    }
    //
    //
    //
    private InputStream fInputStream = null;
    //
    // When we fill a chunk there may be data that was read from the
    // input stream that has not been "processed".  We need to save
    // that data, and any in-progress state, between the calls to
    // fillCurrentChunk() in these instance variables.  
    //
    private boolean fCheckOverflow = false;
    private byte[] fOverflow = null;
    private int fOverflowOffset = 0;
    private int fOverflowEnd = 0;
    private int fOutputOffset = 0;
    private boolean fSkipLinefeed = false;
    private int fPartialMultiByteIn = 0;
    private byte[] fPartialMultiByteChar = new byte[3];
    private int fPartialSurrogatePair = 0;
    private boolean fPartialMultiByteResult = false;
    //
    //
    //
    protected int fillCurrentChunk() throws Exception {
        //
        // See if we can find a way to reuse the buffer that may have been returned
        // with a recyled data chunk.
        //
        char[] recycledData = fCurrentChunk.toCharArray();
        //
        // If we have overflow from the last call, normalize from where
        // we left off, copying into the front of the output buffer.
        //
        fOutputOffset = 0;
        if (fCheckOverflow) {
            //
            // The fOverflowEnd should always be equal to CHUNK_SIZE, unless we hit
            // EOF during the previous call.  Copy the remaining data to the front
            // of the buffer and return it as the final chunk.
            //
            fMostRecentData = recycledData;
            if (fOverflowEnd < CharDataChunk.CHUNK_SIZE) {
                recycledData = null;
                if (fOverflowEnd > 0) {
                    if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd - fOverflowOffset)
                        fMostRecentData = new char[1 + fOverflowEnd - fOverflowOffset];
                    copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                } else {
                    if (fMostRecentData == null)
                        fMostRecentData = new char[1];
                }
                fMostRecentData[fOutputOffset] = 0;
                //
                // Update our instance variables
                //
                fOverflow = null;
                fLength += fOutputOffset;
                fCurrentIndex = 0;
                fCurrentChunk.setCharArray(fMostRecentData);
                return (fMostRecentChar = fMostRecentData[0]);
            }
            if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
                fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
            else
                recycledData = null;
            copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
            fCheckOverflow = false;
        } else {
            if (fOverflow == null)
                fOverflow = new byte[CharDataChunk.CHUNK_SIZE];
            fMostRecentData = null;
        }
        while (true) {
            fOverflowOffset = 0;
            fOverflowEnd = 0;
            int capacity = CharDataChunk.CHUNK_SIZE;
            int result = 0;
            do {
                try {
                    result = fInputStream.read(fOverflow, fOverflowEnd, capacity);
                } catch (java.io.IOException ex) {
                    result = -1;
                }
                if (result == -1) {
                    //
                    // We have reached the end of the stream.
                    //
                    fInputStream.close();
                    fInputStream = null;
                    if (fMostRecentData == null) {
                        //
                        // There is no previous output data, so we know that all of the
                        // new input data will fit.
                        //
                        fMostRecentData = recycledData;
                        if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd)
                            fMostRecentData = new char[1 + fOverflowEnd];
                        else
                            recycledData = null;
                        copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                        fOverflow = null;
                        fMostRecentData[fOutputOffset] = 0;
                    } else {
                        //
                        // Copy the input data to the end of the output buffer.
                        //
                        boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                        if (alldone) {
                            if (fOverflowEnd == CharDataChunk.CHUNK_SIZE) {
                                //
                                // Special case - everything fit into the overflow buffer,
                                // except that there is no room for the nul char we use to
                                // indicate EOF.  Set the overflow buffer length to zero.
                                // On the next call to this method, we will detect this
                                // case and which we will handle above .
                                //
                                fCheckOverflow = true;
                                fOverflowOffset = 0;
                                fOverflowEnd = 0;
                            } else {
                                //
                                // It all fit into the output buffer.
                                //
                                fOverflow = null;
                                fMostRecentData[fOutputOffset] = 0;
                            }
                        } else {
                            //
                            // There is still input data left over, save the remaining data as
                            // the overflow buffer for the next call.
                            //
                            fCheckOverflow = true;
                        }
                    }
                    break;
                }
                if (result > 0) {
                    fOverflowEnd += result;
                    capacity -= result;
                }
            } while (capacity > 0);
            //
            //
            //
            if (result == -1)
                break;
            if (fMostRecentData != null) {
                boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
                    //
                    // We filled the output buffer.
                    //
                    if (!alldone) {
                        //
                        // The input buffer will become the next overflow buffer.
                        //
                        fCheckOverflow = true;
                    }
                    break;
                }
            } else {
                //
                // Now normalize the end-of-line characters and see if we need to read more
                // bytes to fill up the buffer.
                //
                fMostRecentData = recycledData;
                if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE)
                    fMostRecentData = new char[CharDataChunk.CHUNK_SIZE];
                else
                    recycledData = null;
                copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset);
                if (fOutputOffset == CharDataChunk.CHUNK_SIZE) {
                    //
                    // The output buffer is full.  We can return now.
                    //
                    break;
                }
            }
            //
            // We will need to get another intput buffer to be able to fill the
            // overflow buffer completely.
            //
        }
        //
        // Update our instance variables
        //
        fLength += fOutputOffset;
        fCurrentIndex = 0;
        fCurrentChunk.setCharArray(fMostRecentData);
        return (fMostRecentChar = fMostRecentData[0]);
    }
    //
    // Copy and normalize bytes from the overflow buffer into chars in our data buffer.
    //
    private boolean copyNormalize(byte[] in, int inOffset, char[] out, int outOffset) throws Exception {
        //
        // Handle all edge cases before dropping into the inner loop.
        //
        int inEnd = fOverflowEnd;
        int outEnd = out.length;
        if (inOffset == inEnd)
            return true;
        byte b = in[inOffset];
        if (fSkipLinefeed) {
            fSkipLinefeed = false;
            if (b == 0x0A) {
                if (++inOffset == inEnd)
                    return exitNormalize(inOffset, outOffset, true);
                b = in[inOffset];
            }
        } else if (fPartialMultiByteIn > 0) {
            if (!handlePartialMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd))
                return fPartialMultiByteResult;
            inOffset = fOverflowOffset;
            outOffset = fOutputOffset;
            b = in[inOffset];
        }
        while (outOffset < outEnd) {
            //
            // Find the longest run that we can guarantee will not exceed the
            // bounds of the outer loop.
            //
            int inCount = inEnd - inOffset;
            int outCount = outEnd - outOffset;
            if (inCount > outCount)
                inCount = outCount;
            inOffset++;
            while (true) {
                while (b == 0x0D || b < 0) {
                    if (b == 0x0D) {
                        out[outOffset++] = 0x0A;
                        if (inOffset == inEnd) {
                            fSkipLinefeed = true;
                            return exitNormalize(inOffset, outOffset, true);
                        }
                        b = in[inOffset];
                        if (b == 0x0A) {
                            if (++inOffset == inEnd)
                                return exitNormalize(inOffset, outOffset, true);
                            b = in[inOffset];
                        }
                        if (outOffset == outEnd)
                            return exitNormalize(inOffset, outOffset, false);
                    } else {
                        if (!handleMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd))
                            return fPartialMultiByteResult;
                        inOffset = fOverflowOffset;
                        outOffset = fOutputOffset;
                        b = in[inOffset];
                    }
                    inCount = inEnd - inOffset;
                    outCount = outEnd - outOffset;
                    if (inCount > outCount)
                        inCount = outCount;
                    inOffset++;
                }
                while (true) {
                    out[outOffset++] = (char)b;
                    if (--inCount == 0)
                        break;
                    b = in[inOffset++];
                    if (b == 0x0D || b < 0)
                        break;
                }
                if (inCount == 0)
                    break;
            }
            if (inOffset == inEnd)
                break;
        }
        return exitNormalize(inOffset, outOffset, inOffset == inEnd);
    }
    //
    //
    //
    private boolean exitNormalize(int inOffset, int outOffset, boolean result) {
        fOverflowOffset = inOffset;
        fOutputOffset = outOffset;
        return result;
    }
    //
    //
    //
    private void savePartialMultiByte(int inCount, byte bz, byte by, byte bx) {
        fPartialMultiByteIn = inCount;
        fPartialMultiByteChar[--inCount] = bz;
        fPartialMultiByteChar[--inCount] = by;
        fPartialMultiByteChar[--inCount] = bx;
    }
    private void savePartialMultiByte(int inCount, byte bz, byte by) {
        fPartialMultiByteIn = inCount;
        fPartialMultiByteChar[--inCount] = bz;
        fPartialMultiByteChar[--inCount] = by;
    }
    private void savePartialMultiByte(int inCount, byte bz) {
        fPartialMultiByteIn = inCount;
        fPartialMultiByteChar[--inCount] = bz;
    }
    private boolean handleMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception {
        if (inOffset == inEnd) {
            savePartialMultiByte(1, b);
            fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
            return false;
        }
        byte b1 = in[inOffset++];
        if ((b1 & 0xc0) != 0x80) {
            Object[] args = {
                Integer.toHexString(b & 0xff),
                Integer.toHexString(b1 & 0xff)
            };
            deferException(ImplementationMessages.ENC5, args, outOffset);
            out[outOffset++] = 0;
            return exitNormalize(inOffset, outOffset, true);
        }
        if ((b & 0xe0) == 0xc0) { // 110yyyyy 10xxxxxx
            int ch = ((0x1f & b)<<6) + (0x3f & b1);
            out[outOffset++] = (char)ch;
            if (inOffset == inEnd || outOffset == outEnd) {
                fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                return false;
            }
        } else {
            if (inOffset == inEnd) {
                savePartialMultiByte(2, b1, b);
                fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                return false;
            }
            byte b2 = in[inOffset++];
            if ((b2 & 0xc0) != 0x80) {
                Object[] args = {
                    Integer.toHexString(b & 0xff),
                    Integer.toHexString(b1 & 0xff),
                    Integer.toHexString(b2 & 0xff)
                };
                deferException(ImplementationMessages.ENC6, args, outOffset);
                out[outOffset++] = 0;
                return exitNormalize(inOffset, outOffset, true);
            }
            if ((b & 0xf0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
                int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
                out[outOffset++] = (char)ch;
                if (inOffset == inEnd || outOffset == outEnd) {
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                    return false;
                }
            } else {
                if ((b & 0xf8) != 0xf0) {
                    Object[] args = { Integer.toHexString(b & 0xff) };
                    deferException(ImplementationMessages.ENC4, args, outOffset);
                    out[outOffset++] = 0;
                    return exitNormalize(inOffset, outOffset, true);
                }
                if (inOffset == inEnd) {
                    savePartialMultiByte(3, b2, b1, b);
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                    return false;
                }
                byte b3 = in[inOffset++];
                if ((b3 & 0xc0) != 0x80) {
                    Object[] args = {
                        Integer.toHexString(b & 0xff),
                        Integer.toHexString(b1 & 0xff),
                        Integer.toHexString(b2 & 0xff),
                        Integer.toHexString(b3 & 0xff)
                    };
                    deferException(ImplementationMessages.ENC7, args, outOffset);
                    out[outOffset++] = 0;
                    return exitNormalize(inOffset, outOffset, true);
                }
                int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
                if (ch >= 0x10000) {
                    out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800);
                    ch = (((ch-0x00010000)&0x3ff)+0xdc00);
                    if (outOffset == outEnd) {
                        fPartialSurrogatePair = ch;
                        fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                        return false;
                    }
                }
                out[outOffset++] = (char)ch;
                if (inOffset == inEnd || outOffset == outEnd) {
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
                    return false;
                }
            }
        }
        return exitNormalize(inOffset, outOffset, true);
    }
    private boolean handlePartialMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception {
        if (outOffset == outEnd) {
            fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd);
            return false;
        }
        if (fPartialMultiByteIn == 4) {
            out[outOffset++] = (char)fPartialSurrogatePair;
            if (outOffset == outEnd) {
                fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                return false;
            }
            fOutputOffset = outOffset;
            return true;
        }
        int byteIn = fPartialMultiByteIn;
        fPartialMultiByteIn = 0;
        byte b1 = 0;
        byte b2 = 0;
        byte b3 = 0;
        switch (byteIn) {
        case 1: b1 = b; break;
        case 2: b2 = b; break;
        case 3: b3 = b; break;
        }
        int i = byteIn;
        switch (byteIn) {
        case 3:
            b2 = fPartialMultiByteChar[--i];
        case 2:
            b1 = fPartialMultiByteChar[--i];
        case 1:
            b = fPartialMultiByteChar[--i];
        }
        switch (byteIn) {
        case 1:
            if ((b1 & 0xc0) != 0x80) {
                Object[] args = {
                    Integer.toHexString(b),
                    Integer.toHexString(b1)
                };
                deferException(ImplementationMessages.ENC5, args, outOffset);
                out[outOffset++] = 0;
                break;
            }
            // fall through
        case 2:
            if ((b & 0xe0) == 0xc0) { // 110yyyyy 10xxxxxx
                int ch = ((0x1f & b)<<6) + (0x3f & b1);
                out[outOffset++] = (char)ch;
                if (outOffset == outEnd) {
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                    return false;
                }
                if (byteIn < 2 && ++inOffset == inEnd) {
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                    return false;
                }
                break;
            }
            if (byteIn < 2) {
                if (++inOffset == inEnd) {
                    savePartialMultiByte(2, b1);
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                    return false;
                }
                b2 = in[inOffset];
            }
            if ((b2 & 0xc0) != 0x80) {
                Object[] args = {
                    Integer.toHexString(b),
                    Integer.toHexString(b1),
                    Integer.toHexString(b2)
                };
                deferException(ImplementationMessages.ENC6, args, outOffset);
                out[outOffset++] = 0;
                break;
            }
            // fall through
        case 3:
            if ((b & 0xf0) == 0xe0) { // 1110zzzz 10yyyyyy 10xxxxxx
                int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2);
                out[outOffset++] = (char)ch;
                if (outOffset == outEnd) {
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                    return false;
                }
                if (byteIn < 3 && ++inOffset == inEnd) {
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                    return false;
                }
                break;
            }
            if (byteIn < 3) {
                if ((b & 0xf8) != 0xf0) {
                    Object[] args = { Integer.toHexString(b) };
                    deferException(ImplementationMessages.ENC4, args, outOffset);
                    out[outOffset++] = 0;
                    break;
                }
                if (++inOffset == inEnd) {
                    savePartialMultiByte(3, b2, b1);
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                    return false;
                }
                b3 = in[inOffset];
            }
            if ((b3 & 0xc0) != 0x80) {
                Object[] args = {
                    Integer.toHexString(b),
                    Integer.toHexString(b1),
                    Integer.toHexString(b2),
                    Integer.toHexString(b3)
                };
                deferException(ImplementationMessages.ENC7, args, outOffset);
                out[outOffset++] = 0;
                break;
            }
            int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3);
            if (ch >= 0x10000) {
                out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800);
                ch = (((ch-0x00010000)&0x3ff)+0xdc00);
                if (outOffset == outEnd) {
                    fPartialSurrogatePair = ch;
                    fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                    return false;
                }
            }
            out[outOffset++] = (char)ch;
            if (outOffset == outEnd) {
                fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false);
                return false;
            }
            if (++inOffset == inEnd) {
                fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true);
                return false;
            }
            break;
        }
        return exitNormalize(inOffset, outOffset, true);
    }
}