001/*
002 * (C) Copyright 2006-2015 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Bogdan Stefanescu
018 *     Thierry Delprat
019 *     Florent Guillaume
020 */
021package org.nuxeo.ecm.core.io.impl.plugins;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStream;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Comparator;
030import java.util.Enumeration;
031import java.util.List;
032import java.util.zip.ZipEntry;
033import java.util.zip.ZipFile;
034import java.util.zip.ZipInputStream;
035
036import org.apache.commons.io.FilenameUtils;
037import org.apache.commons.io.IOUtils;
038import org.apache.commons.io.input.CloseShieldInputStream;
039import org.dom4j.Document;
040import org.dom4j.DocumentException;
041import org.dom4j.io.SAXReader;
042import org.nuxeo.common.utils.Path;
043import org.nuxeo.ecm.core.api.Blob;
044import org.nuxeo.ecm.core.api.Blobs;
045import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob;
046import org.nuxeo.ecm.core.io.ExportConstants;
047import org.nuxeo.ecm.core.io.ExportedDocument;
048import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader;
049import org.nuxeo.ecm.core.io.impl.DWord;
050import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl;
051
052/**
053 * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}.
054 * <p>
055 * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was
056 * modified you need to use the NuxeoArchiveReader(File) constructor.
057 * <p>
058 * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by
059 * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used
060 * in a random-access manner.
061 */
062public class NuxeoArchiveReader extends AbstractDocumentReader {
063
064    private ZipInputStream in;
065
066    private boolean inMustBeClosed;
067
068    private ZipFile zipFile;
069
070    private List<String> zipIndex;
071
072    private final Collection<File> filesToDelete = new ArrayList<File>();
073
074    /**
075     * Create a {@link NuxeoArchiveReader} from an {@link InputStream}.
076     * <p>
077     * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}.
078     *
079     * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter
080     * @throws IOException
081     */
082    public NuxeoArchiveReader(InputStream in) throws IOException {
083        this(new ZipInputStream(in), true);
084    }
085
086    /**
087     * Protected constructor used by {@link ZipReader}. Must not close the stream when done.
088     */
089    protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException {
090        this.in = in;
091        inMustBeClosed = !checkMarker;
092        if (checkMarker) {
093            checkMarker();
094        }
095    }
096
097    /**
098     * Create a {@link NuxeoArchiveReader} from a {@link File}.
099     * <p>
100     * This constructor is different from others because it allows the input zip file to have been generated by an other
101     * engine that {@link NuxeoArchiveWriter}.
102     * <p>
103     * In particular, you can use this constructor on a Zip Archive that was manually modified.
104     *
105     * @param file a Zip archive
106     */
107    public NuxeoArchiveReader(File file) throws IOException {
108        this.zipFile = new ZipFile(file);
109        buildOrderedZipIndex();
110        checkMarker();
111    }
112
113    protected void buildOrderedZipIndex() {
114        zipIndex = new ArrayList<String>();
115        Enumeration<? extends ZipEntry> entries = zipFile.entries();
116
117        while (entries.hasMoreElements()) {
118            ZipEntry entry = entries.nextElement();
119            zipIndex.add(entry.getName());
120        }
121        Collections.sort(zipIndex, new Comparator<String>() {
122            @Override
123            public int compare(String spath1, String spath2) {
124                return spath1.compareTo(spath2);
125            }
126        });
127    }
128
129    @Override
130    public ExportedDocument read() throws IOException {
131        if (zipFile != null) {
132            return readZip();
133        } else {
134            return readOrderedStream();
135        }
136    }
137
138    protected ExportedDocument readZip() throws IOException {
139
140        if (zipIndex.size() == 0) {
141            return null;
142        }
143        String idxname = zipIndex.remove(0);
144        ZipEntry entry = zipFile.getEntry(idxname);
145        if (entry == null) {
146            return null;
147        }
148
149        if (!entry.isDirectory()) {
150            if (entry.getName().equals(ExportConstants.MARKER_FILE)) {
151                return read();
152            } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) {
153                // the repository ROOT! TODO: how to handle root? it doesn't
154                // have a dir ..
155                ExportedDocument xdoc = new ExportedDocumentImpl();
156                xdoc.setPath(new Path("/"));
157                xdoc.setDocument(loadXML(entry));
158                return xdoc;
159            } else {
160                throw new IOException("Invalid Nuxeo archive on entry " + entry.getName());
161            }
162        }
163
164        // find the direct children entry that are part of the same document
165        // since archive is modifiable we can not rely on the Extra bits thing
166        List<String> childEntries = new ArrayList<String>();
167        int depth = new Path(idxname).removeTrailingSeparator().segmentCount();
168        for (String path : zipIndex) {
169            if (path.startsWith(idxname)) {
170                int subdepth = new Path(path).removeTrailingSeparator().segmentCount();
171                if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) {
172                    continue;
173                }
174                childEntries.add(path);
175            } else {
176                break;
177            }
178        }
179
180        if (childEntries.size() == 0) {
181            return read(); // empty dir -> try next directory
182        }
183        String name = entry.getName();
184        ExportedDocument xdoc = new ExportedDocumentImpl();
185        xdoc.setPath(new Path(name).removeTrailingSeparator());
186        for (String childEntryName : childEntries) {
187            int i = zipIndex.indexOf(childEntryName);
188            idxname = zipIndex.remove(i);
189            entry = zipFile.getEntry(idxname);
190            name = entry.getName();
191            if (name.endsWith(ExportConstants.DOCUMENT_FILE)) {
192                xdoc.setDocument(loadXML(entry));
193            } else if (name.endsWith(".xml")) { // external doc file
194                xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry));
195            } else { // should be a blob
196                xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry));
197            }
198        }
199        return xdoc;
200    }
201
202    protected ExportedDocument readOrderedStream() throws IOException {
203        ZipEntry entry = in.getNextEntry();
204        if (entry == null) {
205            return null;
206        }
207        if (!entry.isDirectory()) {
208            if (entry.getName().equals(ExportConstants.MARKER_FILE)) {
209                return read();
210            } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) {
211                // the repository ROOT! TODO: how to handle root? it doesn't
212                // have a dir ..
213                ExportedDocument xdoc = new ExportedDocumentImpl();
214                xdoc.setPath(new Path("/"));
215                xdoc.setDocument(loadXML(entry));
216                return xdoc;
217            } else {
218                throw new IOException("Invalid Nuxeo archive");
219            }
220        }
221        int count = getFilesCount(entry);
222        if (count == 0) {
223            return read(); // empty dir -> try next directory
224        }
225        String name = entry.getName();
226        ExportedDocument xdoc = new ExportedDocumentImpl();
227        xdoc.setPath(new Path(name).removeTrailingSeparator());
228        for (int i = 0; i < count; i++) {
229            entry = in.getNextEntry();
230            name = entry.getName();
231            if (name.endsWith(ExportConstants.DOCUMENT_FILE)) {
232                xdoc.setDocument(loadXML(entry));
233            } else if (name.endsWith(".xml")) { // external doc file
234                xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry));
235            } else { // should be a blob
236                xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry));
237            }
238        }
239        return xdoc;
240    }
241
242    @Override
243    public void close() {
244        IOUtils.closeQuietly(zipFile);
245        if (inMustBeClosed) {
246            IOUtils.closeQuietly(in);
247        }
248        for (File file : filesToDelete) {
249            file.delete();
250        }
251    }
252
253    private static int getFilesCount(ZipEntry entry) throws IOException {
254        byte[] bytes = entry.getExtra();
255        if (bytes == null) {
256            return 0;
257        } else if (bytes.length != 4) {
258            throw new IOException("Invalid Nuxeo Archive");
259        } else {
260            return new DWord(bytes).getInt();
261        }
262    }
263
264    private Document loadXML(ZipEntry entry) throws IOException {
265        try {
266            SAXReader saxReader = new SAXReader();
267            if (zipFile != null) {
268                try (InputStream stream = zipFile.getInputStream(entry)) {
269                    return saxReader.read(stream);
270                }
271            } else {
272                // SAXReader.read always closes the stream, but we don't want that
273                // so wrap it in a CloseShieldInputStream
274                try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) {
275                    return saxReader.read(ncin);
276                }
277            }
278        } catch (DocumentException e) {
279            throw new IOException("Failed to read zip entry: " + entry.getName(), e);
280        }
281    }
282
283    private Blob createBlob(ZipEntry entry) throws IOException {
284        if (zipFile != null) {
285            return new ZipEntryBlob(zipFile, entry);
286        } else {
287            // should decompress since this is a generic stream
288            // FileBlob always closes the stream, but we don't want that
289            // so wrap it in a CloseShieldInputStream
290            try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) {
291                Blob blob = Blobs.createBlob(ncin);
292                filesToDelete.add(blob.getFile());
293                return blob;
294            }
295        }
296    }
297
298    private void checkMarker() throws IOException {
299
300        if (zipFile == null) {
301            ZipEntry entry = in.getNextEntry();
302            if (entry == null) {
303                throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)");
304            }
305            if (!isMarkerEntry(entry)) {
306                throw new IOException("Not a valid Nuxeo Archive - no marker file found");
307            }
308        } else {
309            if (!zipIndex.contains(ExportConstants.MARKER_FILE)) {
310                throw new IOException("Not a valid Nuxeo Archive - no marker file found");
311            }
312        }
313    }
314
315    public static boolean isMarkerEntry(ZipEntry entry) {
316        return entry.getName().equals(ExportConstants.MARKER_FILE);
317    }
318
319}