001/* 002 * (C) Copyright 2006-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Bogdan Stefanescu 018 * Thierry Delprat 019 * Florent Guillaume 020 */ 021package org.nuxeo.ecm.core.io.impl.plugins; 022 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.Comparator; 029import java.util.Enumeration; 030import java.util.List; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipFile; 033import java.util.zip.ZipInputStream; 034 035import org.apache.commons.io.FilenameUtils; 036import org.apache.commons.io.IOUtils; 037import org.apache.commons.io.input.CloseShieldInputStream; 038import org.dom4j.Document; 039import org.dom4j.DocumentException; 040import org.dom4j.io.SAXReader; 041import org.nuxeo.common.utils.Path; 042import org.nuxeo.ecm.core.api.Blob; 043import org.nuxeo.ecm.core.api.Blobs; 044import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob; 045import org.nuxeo.ecm.core.io.ExportConstants; 046import org.nuxeo.ecm.core.io.ExportedDocument; 047import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader; 048import org.nuxeo.ecm.core.io.impl.DWord; 049import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl; 050 051/** 052 * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}. 053 * <p> 054 * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was 055 * modified you need to use the NuxeoArchiveReader(File) constructor. 056 * <p> 057 * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by 058 * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used 059 * in a random-access manner. 060 */ 061public class NuxeoArchiveReader extends AbstractDocumentReader { 062 063 private ZipInputStream in; 064 065 private boolean inMustBeClosed; 066 067 private ZipFile zipFile; 068 069 private List<String> zipIndex; 070 071 private final Collection<File> filesToDelete = new ArrayList<>(); 072 073 /** 074 * Create a {@link NuxeoArchiveReader} from an {@link InputStream}. 075 * <p> 076 * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}. 077 * 078 * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter 079 */ 080 public NuxeoArchiveReader(InputStream in) throws IOException { 081 this(new ZipInputStream(in), true); 082 } 083 084 /** 085 * Protected constructor used by {@link ZipReader}. Must not close the stream when done. 086 */ 087 protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException { 088 this.in = in; 089 inMustBeClosed = !checkMarker; 090 if (checkMarker) { 091 checkMarker(); 092 } 093 } 094 095 /** 096 * Create a {@link NuxeoArchiveReader} from a {@link File}. 097 * <p> 098 * This constructor is different from others because it allows the input zip file to have been generated by an other 099 * engine that {@link NuxeoArchiveWriter}. 100 * <p> 101 * In particular, you can use this constructor on a Zip Archive that was manually modified. 102 * 103 * @param file a Zip archive 104 */ 105 public NuxeoArchiveReader(File file) throws IOException { 106 this.zipFile = new ZipFile(file); 107 buildOrderedZipIndex(); 108 checkMarker(); 109 } 110 111 protected void buildOrderedZipIndex() { 112 zipIndex = new ArrayList<>(); 113 Enumeration<? extends ZipEntry> entries = zipFile.entries(); 114 115 while (entries.hasMoreElements()) { 116 ZipEntry entry = entries.nextElement(); 117 zipIndex.add(entry.getName()); 118 } 119 zipIndex.sort(Comparator.naturalOrder()); 120 } 121 122 @Override 123 public ExportedDocument read() throws IOException { 124 if (zipFile != null) { 125 return readZip(); 126 } else { 127 return readOrderedStream(); 128 } 129 } 130 131 protected ExportedDocument readZip() throws IOException { 132 133 if (zipIndex.size() == 0) { 134 return null; 135 } 136 String idxname = zipIndex.remove(0); 137 ZipEntry entry = zipFile.getEntry(idxname); 138 if (entry == null) { 139 return null; 140 } 141 142 if (!entry.isDirectory()) { 143 if (entry.getName().equals(ExportConstants.MARKER_FILE)) { 144 return read(); 145 } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { 146 // the repository ROOT! TODO: how to handle root? it doesn't 147 // have a dir .. 148 ExportedDocument xdoc = new ExportedDocumentImpl(); 149 xdoc.setPath(new Path("/")); 150 xdoc.setDocument(loadXML(entry)); 151 return xdoc; 152 } else { 153 throw new IOException("Invalid Nuxeo archive on entry " + entry.getName()); 154 } 155 } 156 157 // find the direct children entry that are part of the same document 158 // since archive is modifiable we can not rely on the Extra bits thing 159 List<String> childEntries = new ArrayList<>(); 160 int depth = new Path(idxname).removeTrailingSeparator().segmentCount(); 161 for (String path : zipIndex) { 162 if (path.startsWith(idxname)) { 163 int subdepth = new Path(path).removeTrailingSeparator().segmentCount(); 164 if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) { 165 continue; 166 } 167 childEntries.add(path); 168 } else { 169 break; 170 } 171 } 172 173 if (childEntries.size() == 0) { 174 return read(); // empty dir -> try next directory 175 } 176 String name = entry.getName(); 177 ExportedDocument xdoc = new ExportedDocumentImpl(); 178 xdoc.setPath(new Path(name).removeTrailingSeparator()); 179 for (String childEntryName : childEntries) { 180 int i = zipIndex.indexOf(childEntryName); 181 idxname = zipIndex.remove(i); 182 entry = zipFile.getEntry(idxname); 183 name = entry.getName(); 184 if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { 185 xdoc.setDocument(loadXML(entry)); 186 } else if (name.endsWith(".xml")) { // external doc file 187 xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); 188 } else { // should be a blob 189 xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); 190 } 191 } 192 return xdoc; 193 } 194 195 protected ExportedDocument readOrderedStream() throws IOException { 196 ZipEntry entry = in.getNextEntry(); 197 if (entry == null) { 198 return null; 199 } 200 if (!entry.isDirectory()) { 201 if (entry.getName().equals(ExportConstants.MARKER_FILE)) { 202 return read(); 203 } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { 204 // the repository ROOT! TODO: how to handle root? it doesn't 205 // have a dir .. 206 ExportedDocument xdoc = new ExportedDocumentImpl(); 207 xdoc.setPath(new Path("/")); 208 xdoc.setDocument(loadXML(entry)); 209 return xdoc; 210 } else { 211 throw new IOException("Invalid Nuxeo archive"); 212 } 213 } 214 int count = getFilesCount(entry); 215 if (count == 0) { 216 return read(); // empty dir -> try next directory 217 } 218 String name = entry.getName(); 219 ExportedDocument xdoc = new ExportedDocumentImpl(); 220 xdoc.setPath(new Path(name).removeTrailingSeparator()); 221 for (int i = 0; i < count; i++) { 222 entry = in.getNextEntry(); 223 name = entry.getName(); 224 if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { 225 xdoc.setDocument(loadXML(entry)); 226 } else if (name.endsWith(".xml")) { // external doc file 227 xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); 228 } else { // should be a blob 229 xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); 230 } 231 } 232 return xdoc; 233 } 234 235 @Override 236 public void close() { 237 IOUtils.closeQuietly(zipFile); 238 if (inMustBeClosed) { 239 IOUtils.closeQuietly(in); 240 } 241 for (File file : filesToDelete) { 242 file.delete(); 243 } 244 } 245 246 private static int getFilesCount(ZipEntry entry) throws IOException { 247 byte[] bytes = entry.getExtra(); 248 if (bytes == null) { 249 return 0; 250 } else if (bytes.length != 4) { 251 throw new IOException("Invalid Nuxeo Archive"); 252 } else { 253 return new DWord(bytes).getInt(); 254 } 255 } 256 257 private Document loadXML(ZipEntry entry) throws IOException { 258 try { 259 SAXReader saxReader = new SAXReader(); 260 if (zipFile != null) { 261 try (InputStream stream = zipFile.getInputStream(entry)) { 262 return saxReader.read(stream); 263 } 264 } else { 265 // SAXReader.read always closes the stream, but we don't want that 266 // so wrap it in a CloseShieldInputStream 267 try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { 268 return saxReader.read(ncin); 269 } 270 } 271 } catch (DocumentException e) { 272 throw new IOException("Failed to read zip entry: " + entry.getName(), e); 273 } 274 } 275 276 private Blob createBlob(ZipEntry entry) throws IOException { 277 if (zipFile != null) { 278 return new ZipEntryBlob(zipFile, entry); 279 } else { 280 // should decompress since this is a generic stream 281 // FileBlob always closes the stream, but we don't want that 282 // so wrap it in a CloseShieldInputStream 283 try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { 284 Blob blob = Blobs.createBlob(ncin); 285 filesToDelete.add(blob.getFile()); 286 return blob; 287 } 288 } 289 } 290 291 private void checkMarker() throws IOException { 292 293 if (zipFile == null) { 294 ZipEntry entry = in.getNextEntry(); 295 if (entry == null) { 296 throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)"); 297 } 298 if (!isMarkerEntry(entry)) { 299 throw new IOException("Not a valid Nuxeo Archive - no marker file found"); 300 } 301 } else { 302 if (!zipIndex.contains(ExportConstants.MARKER_FILE)) { 303 throw new IOException("Not a valid Nuxeo Archive - no marker file found"); 304 } 305 } 306 } 307 308 public static boolean isMarkerEntry(ZipEntry entry) { 309 return entry.getName().equals(ExportConstants.MARKER_FILE); 310 } 311 312}