001/*
002 * Copyright (c) 2006-2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the Eclipse Public License v1.0
006 * which accompanies this distribution, and is available at
007 * http://www.eclipse.org/legal/epl-v10.html
008 *
009 * Contributors:
010 *     Bogdan Stefanescu
011 *     Thierry Delprat
012 *     Florent Guillaume
013 */
014package org.nuxeo.ecm.core.io.impl.plugins;
015
016import java.io.File;
017import java.io.IOException;
018import java.io.InputStream;
019import java.util.ArrayList;
020import java.util.Collection;
021import java.util.Collections;
022import java.util.Comparator;
023import java.util.Enumeration;
024import java.util.List;
025import java.util.zip.ZipEntry;
026import java.util.zip.ZipFile;
027import java.util.zip.ZipInputStream;
028
029import org.apache.commons.io.FilenameUtils;
030import org.apache.commons.io.IOUtils;
031import org.apache.commons.io.input.CloseShieldInputStream;
032import org.dom4j.Document;
033import org.dom4j.DocumentException;
034import org.dom4j.io.SAXReader;
035import org.nuxeo.common.utils.Path;
036import org.nuxeo.ecm.core.api.Blob;
037import org.nuxeo.ecm.core.api.Blobs;
038import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob;
039import org.nuxeo.ecm.core.io.ExportConstants;
040import org.nuxeo.ecm.core.io.ExportedDocument;
041import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader;
042import org.nuxeo.ecm.core.io.impl.DWord;
043import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl;
044
045/**
046 * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}.
047 * <p>
048 * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was
049 * modified you need to use the NuxeoArchiveReader(File) constructor.
050 * <p>
051 * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by
052 * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used
053 * in a random-access manner.
054 */
055public class NuxeoArchiveReader extends AbstractDocumentReader {
056
057    private ZipInputStream in;
058
059    private boolean inMustBeClosed;
060
061    private ZipFile zipFile;
062
063    private List<String> zipIndex;
064
065    private final Collection<File> filesToDelete = new ArrayList<File>();
066
067    /**
068     * Create a {@link NuxeoArchiveReader} from an {@link InputStream}.
069     * <p>
070     * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}.
071     *
072     * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter
073     * @throws IOException
074     */
075    public NuxeoArchiveReader(InputStream in) throws IOException {
076        this(new ZipInputStream(in), true);
077    }
078
079    /**
080     * Protected constructor used by {@link ZipReader}. Must not close the stream when done.
081     */
082    protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException {
083        this.in = in;
084        inMustBeClosed = !checkMarker;
085        if (checkMarker) {
086            checkMarker();
087        }
088    }
089
090    /**
091     * Create a {@link NuxeoArchiveReader} from a {@link File}.
092     * <p>
093     * This constructor is different from others because it allows the input zip file to have been generated by an other
094     * engine that {@link NuxeoArchiveWriter}.
095     * <p>
096     * In particular, you can use this constructor on a Zip Archive that was manually modified.
097     *
098     * @param file a Zip archive
099     */
100    public NuxeoArchiveReader(File file) throws IOException {
101        this.zipFile = new ZipFile(file);
102        buildOrderedZipIndex();
103        checkMarker();
104    }
105
106    protected void buildOrderedZipIndex() {
107        zipIndex = new ArrayList<String>();
108        Enumeration<? extends ZipEntry> entries = zipFile.entries();
109
110        while (entries.hasMoreElements()) {
111            ZipEntry entry = entries.nextElement();
112            zipIndex.add(entry.getName());
113        }
114        Collections.sort(zipIndex, new Comparator<String>() {
115            @Override
116            public int compare(String spath1, String spath2) {
117                return spath1.compareTo(spath2);
118            }
119        });
120    }
121
122    @Override
123    public ExportedDocument read() throws IOException {
124        if (zipFile != null) {
125            return readZip();
126        } else {
127            return readOrderedStream();
128        }
129    }
130
131    protected ExportedDocument readZip() throws IOException {
132
133        if (zipIndex.size() == 0) {
134            return null;
135        }
136        String idxname = zipIndex.remove(0);
137        ZipEntry entry = zipFile.getEntry(idxname);
138        if (entry == null) {
139            return null;
140        }
141
142        if (!entry.isDirectory()) {
143            if (entry.getName().equals(ExportConstants.MARKER_FILE)) {
144                return read();
145            } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) {
146                // the repository ROOT! TODO: how to handle root? it doesn't
147                // have a dir ..
148                ExportedDocument xdoc = new ExportedDocumentImpl();
149                xdoc.setPath(new Path("/"));
150                xdoc.setDocument(loadXML(entry));
151                return xdoc;
152            } else {
153                throw new IOException("Invalid Nuxeo archive on entry " + entry.getName());
154            }
155        }
156
157        // find the direct children entry that are part of the same document
158        // since archive is modifiable we can not rely on the Extra bits thing
159        List<String> childEntries = new ArrayList<String>();
160        int depth = new Path(idxname).removeTrailingSeparator().segmentCount();
161        for (String path : zipIndex) {
162            if (path.startsWith(idxname)) {
163                int subdepth = new Path(path).removeTrailingSeparator().segmentCount();
164                if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) {
165                    continue;
166                }
167                childEntries.add(path);
168            } else {
169                break;
170            }
171        }
172
173        if (childEntries.size() == 0) {
174            return read(); // empty dir -> try next directory
175        }
176        String name = entry.getName();
177        ExportedDocument xdoc = new ExportedDocumentImpl();
178        xdoc.setPath(new Path(name).removeTrailingSeparator());
179        for (String childEntryName : childEntries) {
180            int i = zipIndex.indexOf(childEntryName);
181            idxname = zipIndex.remove(i);
182            entry = zipFile.getEntry(idxname);
183            name = entry.getName();
184            if (name.endsWith(ExportConstants.DOCUMENT_FILE)) {
185                xdoc.setDocument(loadXML(entry));
186            } else if (name.endsWith(".xml")) { // external doc file
187                xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry));
188            } else { // should be a blob
189                xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry));
190            }
191        }
192        return xdoc;
193    }
194
195    protected ExportedDocument readOrderedStream() throws IOException {
196        ZipEntry entry = in.getNextEntry();
197        if (entry == null) {
198            return null;
199        }
200        if (!entry.isDirectory()) {
201            if (entry.getName().equals(ExportConstants.MARKER_FILE)) {
202                return read();
203            } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) {
204                // the repository ROOT! TODO: how to handle root? it doesn't
205                // have a dir ..
206                ExportedDocument xdoc = new ExportedDocumentImpl();
207                xdoc.setPath(new Path("/"));
208                xdoc.setDocument(loadXML(entry));
209                return xdoc;
210            } else {
211                throw new IOException("Invalid Nuxeo archive");
212            }
213        }
214        int count = getFilesCount(entry);
215        if (count == 0) {
216            return read(); // empty dir -> try next directory
217        }
218        String name = entry.getName();
219        ExportedDocument xdoc = new ExportedDocumentImpl();
220        xdoc.setPath(new Path(name).removeTrailingSeparator());
221        for (int i = 0; i < count; i++) {
222            entry = in.getNextEntry();
223            name = entry.getName();
224            if (name.endsWith(ExportConstants.DOCUMENT_FILE)) {
225                xdoc.setDocument(loadXML(entry));
226            } else if (name.endsWith(".xml")) { // external doc file
227                xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry));
228            } else { // should be a blob
229                xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry));
230            }
231        }
232        return xdoc;
233    }
234
235    @Override
236    public void close() {
237        IOUtils.closeQuietly(zipFile);
238        if (inMustBeClosed) {
239            IOUtils.closeQuietly(in);
240        }
241        for (File file : filesToDelete) {
242            file.delete();
243        }
244    }
245
246    private static int getFilesCount(ZipEntry entry) throws IOException {
247        byte[] bytes = entry.getExtra();
248        if (bytes == null) {
249            return 0;
250        } else if (bytes.length != 4) {
251            throw new IOException("Invalid Nuxeo Archive");
252        } else {
253            return new DWord(bytes).getInt();
254        }
255    }
256
257    private Document loadXML(ZipEntry entry) throws IOException {
258        try {
259            SAXReader saxReader = new SAXReader();
260            if (zipFile != null) {
261                try (InputStream stream = zipFile.getInputStream(entry)) {
262                    return saxReader.read(stream);
263                }
264            } else {
265                // SAXReader.read always closes the stream, but we don't want that
266                // so wrap it in a CloseShieldInputStream
267                try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) {
268                    return saxReader.read(ncin);
269                }
270            }
271        } catch (DocumentException e) {
272            throw new IOException("Failed to read zip entry: " + entry.getName(), e);
273        }
274    }
275
276    private Blob createBlob(ZipEntry entry) throws IOException {
277        if (zipFile != null) {
278            return new ZipEntryBlob(zipFile, entry);
279        } else {
280            // should decompress since this is a generic stream
281            // FileBlob always closes the stream, but we don't want that
282            // so wrap it in a CloseShieldInputStream
283            try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) {
284                Blob blob = Blobs.createBlob(ncin);
285                filesToDelete.add(blob.getFile());
286                return blob;
287            }
288        }
289    }
290
291    private void checkMarker() throws IOException {
292
293        if (zipFile == null) {
294            ZipEntry entry = in.getNextEntry();
295            if (entry == null) {
296                throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)");
297            }
298            if (!isMarkerEntry(entry)) {
299                throw new IOException("Not a valid Nuxeo Archive - no marker file found");
300            }
301        } else {
302            if (!zipIndex.contains(ExportConstants.MARKER_FILE)) {
303                throw new IOException("Not a valid Nuxeo Archive - no marker file found");
304            }
305        }
306    }
307
308    public static boolean isMarkerEntry(ZipEntry entry) {
309        return entry.getName().equals(ExportConstants.MARKER_FILE);
310    }
311
312}