001/* 002 * Copyright (c) 2006-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the Eclipse Public License v1.0 006 * which accompanies this distribution, and is available at 007 * http://www.eclipse.org/legal/epl-v10.html 008 * 009 * Contributors: 010 * Bogdan Stefanescu 011 * Thierry Delprat 012 * Florent Guillaume 013 */ 014package org.nuxeo.ecm.core.io.impl.plugins; 015 016import java.io.File; 017import java.io.IOException; 018import java.io.InputStream; 019import java.util.ArrayList; 020import java.util.Collection; 021import java.util.Collections; 022import java.util.Comparator; 023import java.util.Enumeration; 024import java.util.List; 025import java.util.zip.ZipEntry; 026import java.util.zip.ZipFile; 027import java.util.zip.ZipInputStream; 028 029import org.apache.commons.io.FilenameUtils; 030import org.apache.commons.io.IOUtils; 031import org.apache.commons.io.input.CloseShieldInputStream; 032import org.dom4j.Document; 033import org.dom4j.DocumentException; 034import org.dom4j.io.SAXReader; 035import org.nuxeo.common.utils.Path; 036import org.nuxeo.ecm.core.api.Blob; 037import org.nuxeo.ecm.core.api.Blobs; 038import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob; 039import org.nuxeo.ecm.core.io.ExportConstants; 040import org.nuxeo.ecm.core.io.ExportedDocument; 041import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader; 042import org.nuxeo.ecm.core.io.impl.DWord; 043import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl; 044 045/** 046 * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}. 047 * <p> 048 * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was 049 * modified you need to use the NuxeoArchiveReader(File) constructor. 050 * <p> 051 * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by 052 * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used 053 * in a random-access manner. 054 */ 055public class NuxeoArchiveReader extends AbstractDocumentReader { 056 057 private ZipInputStream in; 058 059 private boolean inMustBeClosed; 060 061 private ZipFile zipFile; 062 063 private List<String> zipIndex; 064 065 private final Collection<File> filesToDelete = new ArrayList<File>(); 066 067 /** 068 * Create a {@link NuxeoArchiveReader} from an {@link InputStream}. 069 * <p> 070 * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}. 071 * 072 * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter 073 * @throws IOException 074 */ 075 public NuxeoArchiveReader(InputStream in) throws IOException { 076 this(new ZipInputStream(in), true); 077 } 078 079 /** 080 * Protected constructor used by {@link ZipReader}. Must not close the stream when done. 081 */ 082 protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException { 083 this.in = in; 084 inMustBeClosed = !checkMarker; 085 if (checkMarker) { 086 checkMarker(); 087 } 088 } 089 090 /** 091 * Create a {@link NuxeoArchiveReader} from a {@link File}. 092 * <p> 093 * This constructor is different from others because it allows the input zip file to have been generated by an other 094 * engine that {@link NuxeoArchiveWriter}. 095 * <p> 096 * In particular, you can use this constructor on a Zip Archive that was manually modified. 097 * 098 * @param file a Zip archive 099 */ 100 public NuxeoArchiveReader(File file) throws IOException { 101 this.zipFile = new ZipFile(file); 102 buildOrderedZipIndex(); 103 checkMarker(); 104 } 105 106 protected void buildOrderedZipIndex() { 107 zipIndex = new ArrayList<String>(); 108 Enumeration<? extends ZipEntry> entries = zipFile.entries(); 109 110 while (entries.hasMoreElements()) { 111 ZipEntry entry = entries.nextElement(); 112 zipIndex.add(entry.getName()); 113 } 114 Collections.sort(zipIndex, new Comparator<String>() { 115 @Override 116 public int compare(String spath1, String spath2) { 117 return spath1.compareTo(spath2); 118 } 119 }); 120 } 121 122 @Override 123 public ExportedDocument read() throws IOException { 124 if (zipFile != null) { 125 return readZip(); 126 } else { 127 return readOrderedStream(); 128 } 129 } 130 131 protected ExportedDocument readZip() throws IOException { 132 133 if (zipIndex.size() == 0) { 134 return null; 135 } 136 String idxname = zipIndex.remove(0); 137 ZipEntry entry = zipFile.getEntry(idxname); 138 if (entry == null) { 139 return null; 140 } 141 142 if (!entry.isDirectory()) { 143 if (entry.getName().equals(ExportConstants.MARKER_FILE)) { 144 return read(); 145 } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { 146 // the repository ROOT! TODO: how to handle root? it doesn't 147 // have a dir .. 148 ExportedDocument xdoc = new ExportedDocumentImpl(); 149 xdoc.setPath(new Path("/")); 150 xdoc.setDocument(loadXML(entry)); 151 return xdoc; 152 } else { 153 throw new IOException("Invalid Nuxeo archive on entry " + entry.getName()); 154 } 155 } 156 157 // find the direct children entry that are part of the same document 158 // since archive is modifiable we can not rely on the Extra bits thing 159 List<String> childEntries = new ArrayList<String>(); 160 int depth = new Path(idxname).removeTrailingSeparator().segmentCount(); 161 for (String path : zipIndex) { 162 if (path.startsWith(idxname)) { 163 int subdepth = new Path(path).removeTrailingSeparator().segmentCount(); 164 if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) { 165 continue; 166 } 167 childEntries.add(path); 168 } else { 169 break; 170 } 171 } 172 173 if (childEntries.size() == 0) { 174 return read(); // empty dir -> try next directory 175 } 176 String name = entry.getName(); 177 ExportedDocument xdoc = new ExportedDocumentImpl(); 178 xdoc.setPath(new Path(name).removeTrailingSeparator()); 179 for (String childEntryName : childEntries) { 180 int i = zipIndex.indexOf(childEntryName); 181 idxname = zipIndex.remove(i); 182 entry = zipFile.getEntry(idxname); 183 name = entry.getName(); 184 if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { 185 xdoc.setDocument(loadXML(entry)); 186 } else if (name.endsWith(".xml")) { // external doc file 187 xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); 188 } else { // should be a blob 189 xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); 190 } 191 } 192 return xdoc; 193 } 194 195 protected ExportedDocument readOrderedStream() throws IOException { 196 ZipEntry entry = in.getNextEntry(); 197 if (entry == null) { 198 return null; 199 } 200 if (!entry.isDirectory()) { 201 if (entry.getName().equals(ExportConstants.MARKER_FILE)) { 202 return read(); 203 } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { 204 // the repository ROOT! TODO: how to handle root? it doesn't 205 // have a dir .. 206 ExportedDocument xdoc = new ExportedDocumentImpl(); 207 xdoc.setPath(new Path("/")); 208 xdoc.setDocument(loadXML(entry)); 209 return xdoc; 210 } else { 211 throw new IOException("Invalid Nuxeo archive"); 212 } 213 } 214 int count = getFilesCount(entry); 215 if (count == 0) { 216 return read(); // empty dir -> try next directory 217 } 218 String name = entry.getName(); 219 ExportedDocument xdoc = new ExportedDocumentImpl(); 220 xdoc.setPath(new Path(name).removeTrailingSeparator()); 221 for (int i = 0; i < count; i++) { 222 entry = in.getNextEntry(); 223 name = entry.getName(); 224 if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { 225 xdoc.setDocument(loadXML(entry)); 226 } else if (name.endsWith(".xml")) { // external doc file 227 xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); 228 } else { // should be a blob 229 xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); 230 } 231 } 232 return xdoc; 233 } 234 235 @Override 236 public void close() { 237 IOUtils.closeQuietly(zipFile); 238 if (inMustBeClosed) { 239 IOUtils.closeQuietly(in); 240 } 241 for (File file : filesToDelete) { 242 file.delete(); 243 } 244 } 245 246 private static int getFilesCount(ZipEntry entry) throws IOException { 247 byte[] bytes = entry.getExtra(); 248 if (bytes == null) { 249 return 0; 250 } else if (bytes.length != 4) { 251 throw new IOException("Invalid Nuxeo Archive"); 252 } else { 253 return new DWord(bytes).getInt(); 254 } 255 } 256 257 private Document loadXML(ZipEntry entry) throws IOException { 258 try { 259 SAXReader saxReader = new SAXReader(); 260 if (zipFile != null) { 261 try (InputStream stream = zipFile.getInputStream(entry)) { 262 return saxReader.read(stream); 263 } 264 } else { 265 // SAXReader.read always closes the stream, but we don't want that 266 // so wrap it in a CloseShieldInputStream 267 try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { 268 return saxReader.read(ncin); 269 } 270 } 271 } catch (DocumentException e) { 272 throw new IOException("Failed to read zip entry: " + entry.getName(), e); 273 } 274 } 275 276 private Blob createBlob(ZipEntry entry) throws IOException { 277 if (zipFile != null) { 278 return new ZipEntryBlob(zipFile, entry); 279 } else { 280 // should decompress since this is a generic stream 281 // FileBlob always closes the stream, but we don't want that 282 // so wrap it in a CloseShieldInputStream 283 try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { 284 Blob blob = Blobs.createBlob(ncin); 285 filesToDelete.add(blob.getFile()); 286 return blob; 287 } 288 } 289 } 290 291 private void checkMarker() throws IOException { 292 293 if (zipFile == null) { 294 ZipEntry entry = in.getNextEntry(); 295 if (entry == null) { 296 throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)"); 297 } 298 if (!isMarkerEntry(entry)) { 299 throw new IOException("Not a valid Nuxeo Archive - no marker file found"); 300 } 301 } else { 302 if (!zipIndex.contains(ExportConstants.MARKER_FILE)) { 303 throw new IOException("Not a valid Nuxeo Archive - no marker file found"); 304 } 305 } 306 } 307 308 public static boolean isMarkerEntry(ZipEntry entry) { 309 return entry.getName().equals(ExportConstants.MARKER_FILE); 310 } 311 312}