001/*
002 * (C) Copyright 2006-2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Bogdan Stefanescu
018 *     Thierry Delprat
019 *     Florent Guillaume
020 */
021package org.nuxeo.ecm.core.io.impl.plugins;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Comparator;
029import java.util.Enumeration;
030import java.util.List;
031import java.util.zip.ZipEntry;
032import java.util.zip.ZipFile;
033import java.util.zip.ZipInputStream;
034
035import org.apache.commons.io.FilenameUtils;
036import org.apache.commons.io.IOUtils;
037import org.apache.commons.io.input.CloseShieldInputStream;
038import org.dom4j.Document;
039import org.dom4j.DocumentException;
040import org.dom4j.io.SAXReader;
041import org.nuxeo.common.utils.Path;
042import org.nuxeo.ecm.core.api.Blob;
043import org.nuxeo.ecm.core.api.Blobs;
044import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob;
045import org.nuxeo.ecm.core.io.ExportConstants;
046import org.nuxeo.ecm.core.io.ExportedDocument;
047import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader;
048import org.nuxeo.ecm.core.io.impl.DWord;
049import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl;
050
051/**
052 * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}.
053 * <p>
054 * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was
055 * modified you need to use the NuxeoArchiveReader(File) constructor.
056 * <p>
057 * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by
058 * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used
059 * in a random-access manner.
060 */
061public class NuxeoArchiveReader extends AbstractDocumentReader {
062
063    private ZipInputStream in;
064
065    private boolean inMustBeClosed;
066
067    private ZipFile zipFile;
068
069    private List<String> zipIndex;
070
071    private final Collection<File> filesToDelete = new ArrayList<>();
072
073    /**
074     * Create a {@link NuxeoArchiveReader} from an {@link InputStream}.
075     * <p>
076     * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}.
077     *
078     * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter
079     */
080    public NuxeoArchiveReader(InputStream in) throws IOException {
081        this(new ZipInputStream(in), true);
082    }
083
084    /**
085     * Protected constructor used by {@link ZipReader}. Must not close the stream when done.
086     */
087    protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException {
088        this.in = in;
089        inMustBeClosed = !checkMarker;
090        if (checkMarker) {
091            checkMarker();
092        }
093    }
094
095    /**
096     * Create a {@link NuxeoArchiveReader} from a {@link File}.
097     * <p>
098     * This constructor is different from others because it allows the input zip file to have been generated by an other
099     * engine that {@link NuxeoArchiveWriter}.
100     * <p>
101     * In particular, you can use this constructor on a Zip Archive that was manually modified.
102     *
103     * @param file a Zip archive
104     */
105    public NuxeoArchiveReader(File file) throws IOException {
106        this.zipFile = new ZipFile(file);
107        buildOrderedZipIndex();
108        checkMarker();
109    }
110
111    protected void buildOrderedZipIndex() {
112        zipIndex = new ArrayList<>();
113        Enumeration<? extends ZipEntry> entries = zipFile.entries();
114
115        while (entries.hasMoreElements()) {
116            ZipEntry entry = entries.nextElement();
117            zipIndex.add(entry.getName());
118        }
119        zipIndex.sort(Comparator.naturalOrder());
120    }
121
122    @Override
123    public ExportedDocument read() throws IOException {
124        if (zipFile != null) {
125            return readZip();
126        } else {
127            return readOrderedStream();
128        }
129    }
130
131    protected ExportedDocument readZip() throws IOException {
132
133        if (zipIndex.size() == 0) {
134            return null;
135        }
136        String idxname = zipIndex.remove(0);
137        ZipEntry entry = zipFile.getEntry(idxname);
138        if (entry == null) {
139            return null;
140        }
141
142        if (!entry.isDirectory()) {
143            if (entry.getName().equals(ExportConstants.MARKER_FILE)) {
144                return read();
145            } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) {
146                // the repository ROOT! TODO: how to handle root? it doesn't
147                // have a dir ..
148                ExportedDocument xdoc = new ExportedDocumentImpl();
149                xdoc.setPath(new Path("/"));
150                xdoc.setDocument(loadXML(entry));
151                return xdoc;
152            } else {
153                throw new IOException("Invalid Nuxeo archive on entry " + entry.getName());
154            }
155        }
156
157        // find the direct children entry that are part of the same document
158        // since archive is modifiable we can not rely on the Extra bits thing
159        List<String> childEntries = new ArrayList<>();
160        int depth = new Path(idxname).removeTrailingSeparator().segmentCount();
161        for (String path : zipIndex) {
162            if (path.startsWith(idxname)) {
163                int subdepth = new Path(path).removeTrailingSeparator().segmentCount();
164                if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) {
165                    continue;
166                }
167                childEntries.add(path);
168            } else {
169                break;
170            }
171        }
172
173        if (childEntries.size() == 0) {
174            return read(); // empty dir -> try next directory
175        }
176        String name = entry.getName();
177        ExportedDocument xdoc = new ExportedDocumentImpl();
178        xdoc.setPath(new Path(name).removeTrailingSeparator());
179        for (String childEntryName : childEntries) {
180            int i = zipIndex.indexOf(childEntryName);
181            idxname = zipIndex.remove(i);
182            entry = zipFile.getEntry(idxname);
183            name = entry.getName();
184            if (name.endsWith(ExportConstants.DOCUMENT_FILE)) {
185                xdoc.setDocument(loadXML(entry));
186            } else if (name.endsWith(".xml")) { // external doc file
187                xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry));
188            } else { // should be a blob
189                xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry));
190            }
191        }
192        return xdoc;
193    }
194
195    protected ExportedDocument readOrderedStream() throws IOException {
196        ZipEntry entry = in.getNextEntry();
197        if (entry == null) {
198            return null;
199        }
200        if (!entry.isDirectory()) {
201            if (entry.getName().equals(ExportConstants.MARKER_FILE)) {
202                return read();
203            } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) {
204                // the repository ROOT! TODO: how to handle root? it doesn't
205                // have a dir ..
206                ExportedDocument xdoc = new ExportedDocumentImpl();
207                xdoc.setPath(new Path("/"));
208                xdoc.setDocument(loadXML(entry));
209                return xdoc;
210            } else {
211                throw new IOException("Invalid Nuxeo archive");
212            }
213        }
214        int count = getFilesCount(entry);
215        if (count == 0) {
216            return read(); // empty dir -> try next directory
217        }
218        String name = entry.getName();
219        ExportedDocument xdoc = new ExportedDocumentImpl();
220        xdoc.setPath(new Path(name).removeTrailingSeparator());
221        for (int i = 0; i < count; i++) {
222            entry = in.getNextEntry();
223            name = entry.getName();
224            if (name.endsWith(ExportConstants.DOCUMENT_FILE)) {
225                xdoc.setDocument(loadXML(entry));
226            } else if (name.endsWith(".xml")) { // external doc file
227                xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry));
228            } else { // should be a blob
229                xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry));
230            }
231        }
232        return xdoc;
233    }
234
235    @Override
236    public void close() {
237        IOUtils.closeQuietly(zipFile);
238        if (inMustBeClosed) {
239            IOUtils.closeQuietly(in);
240        }
241        for (File file : filesToDelete) {
242            file.delete();
243        }
244    }
245
246    private static int getFilesCount(ZipEntry entry) throws IOException {
247        byte[] bytes = entry.getExtra();
248        if (bytes == null) {
249            return 0;
250        } else if (bytes.length != 4) {
251            throw new IOException("Invalid Nuxeo Archive");
252        } else {
253            return new DWord(bytes).getInt();
254        }
255    }
256
257    private Document loadXML(ZipEntry entry) throws IOException {
258        try {
259            SAXReader saxReader = new SAXReader();
260            if (zipFile != null) {
261                try (InputStream stream = zipFile.getInputStream(entry)) {
262                    return saxReader.read(stream);
263                }
264            } else {
265                // SAXReader.read always closes the stream, but we don't want that
266                // so wrap it in a CloseShieldInputStream
267                try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) {
268                    return saxReader.read(ncin);
269                }
270            }
271        } catch (DocumentException e) {
272            throw new IOException("Failed to read zip entry: " + entry.getName(), e);
273        }
274    }
275
276    private Blob createBlob(ZipEntry entry) throws IOException {
277        if (zipFile != null) {
278            return new ZipEntryBlob(zipFile, entry);
279        } else {
280            // should decompress since this is a generic stream
281            // FileBlob always closes the stream, but we don't want that
282            // so wrap it in a CloseShieldInputStream
283            try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) {
284                Blob blob = Blobs.createBlob(ncin);
285                filesToDelete.add(blob.getFile());
286                return blob;
287            }
288        }
289    }
290
291    private void checkMarker() throws IOException {
292
293        if (zipFile == null) {
294            ZipEntry entry = in.getNextEntry();
295            if (entry == null) {
296                throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)");
297            }
298            if (!isMarkerEntry(entry)) {
299                throw new IOException("Not a valid Nuxeo Archive - no marker file found");
300            }
301        } else {
302            if (!zipIndex.contains(ExportConstants.MARKER_FILE)) {
303                throw new IOException("Not a valid Nuxeo Archive - no marker file found");
304            }
305        }
306    }
307
308    public static boolean isMarkerEntry(ZipEntry entry) {
309        return entry.getName().equals(ExportConstants.MARKER_FILE);
310    }
311
312}