001/* 002 * (C) Copyright 2006-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Bogdan Stefanescu 018 * Thierry Delprat 019 * Florent Guillaume 020 */ 021package org.nuxeo.ecm.core.io.impl.plugins; 022 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.Enumeration; 031import java.util.List; 032import java.util.zip.ZipEntry; 033import java.util.zip.ZipFile; 034import java.util.zip.ZipInputStream; 035 036import org.apache.commons.io.FilenameUtils; 037import org.apache.commons.io.IOUtils; 038import org.apache.commons.io.input.CloseShieldInputStream; 039import org.dom4j.Document; 040import org.dom4j.DocumentException; 041import org.dom4j.io.SAXReader; 042import org.nuxeo.common.utils.Path; 043import org.nuxeo.ecm.core.api.Blob; 044import org.nuxeo.ecm.core.api.Blobs; 045import org.nuxeo.ecm.core.api.impl.blob.ZipEntryBlob; 046import org.nuxeo.ecm.core.io.ExportConstants; 047import org.nuxeo.ecm.core.io.ExportedDocument; 048import org.nuxeo.ecm.core.io.impl.AbstractDocumentReader; 049import org.nuxeo.ecm.core.io.impl.DWord; 050import org.nuxeo.ecm.core.io.impl.ExportedDocumentImpl; 051 052/** 053 * Reads nuxeo archives generated using {@link NuxeoArchiveWriter}. 054 * <p> 055 * If you need to read a CoreIO XML Archive that was not directly generated by {@link NuxeoArchiveWriter} or that was 056 * modified you need to use the NuxeoArchiveReader(File) constructor. 057 * <p> 058 * This implementation holds either a {@link ZipInputStream}, in which case it is assumed to have been generated by 059 * {@link NuxeoArchiveWriter} and has a special format (TODO DOCUMENT), or it holds a {@link ZipFile} that may be used 060 * in a random-access manner. 061 */ 062public class NuxeoArchiveReader extends AbstractDocumentReader { 063 064 private ZipInputStream in; 065 066 private boolean inMustBeClosed; 067 068 private ZipFile zipFile; 069 070 private List<String> zipIndex; 071 072 private final Collection<File> filesToDelete = new ArrayList<File>(); 073 074 /** 075 * Create a {@link NuxeoArchiveReader} from an {@link InputStream}. 076 * <p> 077 * The InputStream must point to an archive that was generated by {@link NuxeoArchiveWriter}. 078 * 079 * @param in InputStream pointing an archive that was generated by NuxeoArchiveWriter 080 * @throws IOException 081 */ 082 public NuxeoArchiveReader(InputStream in) throws IOException { 083 this(new ZipInputStream(in), true); 084 } 085 086 /** 087 * Protected constructor used by {@link ZipReader}. Must not close the stream when done. 088 */ 089 protected NuxeoArchiveReader(ZipInputStream in, boolean checkMarker) throws IOException { 090 this.in = in; 091 inMustBeClosed = !checkMarker; 092 if (checkMarker) { 093 checkMarker(); 094 } 095 } 096 097 /** 098 * Create a {@link NuxeoArchiveReader} from a {@link File}. 099 * <p> 100 * This constructor is different from others because it allows the input zip file to have been generated by an other 101 * engine that {@link NuxeoArchiveWriter}. 102 * <p> 103 * In particular, you can use this constructor on a Zip Archive that was manually modified. 104 * 105 * @param file a Zip archive 106 */ 107 public NuxeoArchiveReader(File file) throws IOException { 108 this.zipFile = new ZipFile(file); 109 buildOrderedZipIndex(); 110 checkMarker(); 111 } 112 113 protected void buildOrderedZipIndex() { 114 zipIndex = new ArrayList<String>(); 115 Enumeration<? extends ZipEntry> entries = zipFile.entries(); 116 117 while (entries.hasMoreElements()) { 118 ZipEntry entry = entries.nextElement(); 119 zipIndex.add(entry.getName()); 120 } 121 Collections.sort(zipIndex, new Comparator<String>() { 122 @Override 123 public int compare(String spath1, String spath2) { 124 return spath1.compareTo(spath2); 125 } 126 }); 127 } 128 129 @Override 130 public ExportedDocument read() throws IOException { 131 if (zipFile != null) { 132 return readZip(); 133 } else { 134 return readOrderedStream(); 135 } 136 } 137 138 protected ExportedDocument readZip() throws IOException { 139 140 if (zipIndex.size() == 0) { 141 return null; 142 } 143 String idxname = zipIndex.remove(0); 144 ZipEntry entry = zipFile.getEntry(idxname); 145 if (entry == null) { 146 return null; 147 } 148 149 if (!entry.isDirectory()) { 150 if (entry.getName().equals(ExportConstants.MARKER_FILE)) { 151 return read(); 152 } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { 153 // the repository ROOT! TODO: how to handle root? it doesn't 154 // have a dir .. 155 ExportedDocument xdoc = new ExportedDocumentImpl(); 156 xdoc.setPath(new Path("/")); 157 xdoc.setDocument(loadXML(entry)); 158 return xdoc; 159 } else { 160 throw new IOException("Invalid Nuxeo archive on entry " + entry.getName()); 161 } 162 } 163 164 // find the direct children entry that are part of the same document 165 // since archive is modifiable we can not rely on the Extra bits thing 166 List<String> childEntries = new ArrayList<String>(); 167 int depth = new Path(idxname).removeTrailingSeparator().segmentCount(); 168 for (String path : zipIndex) { 169 if (path.startsWith(idxname)) { 170 int subdepth = new Path(path).removeTrailingSeparator().segmentCount(); 171 if (subdepth != depth + 1 || zipFile.getEntry(path).isDirectory()) { 172 continue; 173 } 174 childEntries.add(path); 175 } else { 176 break; 177 } 178 } 179 180 if (childEntries.size() == 0) { 181 return read(); // empty dir -> try next directory 182 } 183 String name = entry.getName(); 184 ExportedDocument xdoc = new ExportedDocumentImpl(); 185 xdoc.setPath(new Path(name).removeTrailingSeparator()); 186 for (String childEntryName : childEntries) { 187 int i = zipIndex.indexOf(childEntryName); 188 idxname = zipIndex.remove(i); 189 entry = zipFile.getEntry(idxname); 190 name = entry.getName(); 191 if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { 192 xdoc.setDocument(loadXML(entry)); 193 } else if (name.endsWith(".xml")) { // external doc file 194 xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); 195 } else { // should be a blob 196 xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); 197 } 198 } 199 return xdoc; 200 } 201 202 protected ExportedDocument readOrderedStream() throws IOException { 203 ZipEntry entry = in.getNextEntry(); 204 if (entry == null) { 205 return null; 206 } 207 if (!entry.isDirectory()) { 208 if (entry.getName().equals(ExportConstants.MARKER_FILE)) { 209 return read(); 210 } else if (entry.getName().equals(ExportConstants.DOCUMENT_FILE)) { 211 // the repository ROOT! TODO: how to handle root? it doesn't 212 // have a dir .. 213 ExportedDocument xdoc = new ExportedDocumentImpl(); 214 xdoc.setPath(new Path("/")); 215 xdoc.setDocument(loadXML(entry)); 216 return xdoc; 217 } else { 218 throw new IOException("Invalid Nuxeo archive"); 219 } 220 } 221 int count = getFilesCount(entry); 222 if (count == 0) { 223 return read(); // empty dir -> try next directory 224 } 225 String name = entry.getName(); 226 ExportedDocument xdoc = new ExportedDocumentImpl(); 227 xdoc.setPath(new Path(name).removeTrailingSeparator()); 228 for (int i = 0; i < count; i++) { 229 entry = in.getNextEntry(); 230 name = entry.getName(); 231 if (name.endsWith(ExportConstants.DOCUMENT_FILE)) { 232 xdoc.setDocument(loadXML(entry)); 233 } else if (name.endsWith(".xml")) { // external doc file 234 xdoc.putDocument(FilenameUtils.getBaseName(entry.getName()), loadXML(entry)); 235 } else { // should be a blob 236 xdoc.putBlob(FilenameUtils.getName(entry.getName()), createBlob(entry)); 237 } 238 } 239 return xdoc; 240 } 241 242 @Override 243 public void close() { 244 IOUtils.closeQuietly(zipFile); 245 if (inMustBeClosed) { 246 IOUtils.closeQuietly(in); 247 } 248 for (File file : filesToDelete) { 249 file.delete(); 250 } 251 } 252 253 private static int getFilesCount(ZipEntry entry) throws IOException { 254 byte[] bytes = entry.getExtra(); 255 if (bytes == null) { 256 return 0; 257 } else if (bytes.length != 4) { 258 throw new IOException("Invalid Nuxeo Archive"); 259 } else { 260 return new DWord(bytes).getInt(); 261 } 262 } 263 264 private Document loadXML(ZipEntry entry) throws IOException { 265 try { 266 SAXReader saxReader = new SAXReader(); 267 if (zipFile != null) { 268 try (InputStream stream = zipFile.getInputStream(entry)) { 269 return saxReader.read(stream); 270 } 271 } else { 272 // SAXReader.read always closes the stream, but we don't want that 273 // so wrap it in a CloseShieldInputStream 274 try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { 275 return saxReader.read(ncin); 276 } 277 } 278 } catch (DocumentException e) { 279 throw new IOException("Failed to read zip entry: " + entry.getName(), e); 280 } 281 } 282 283 private Blob createBlob(ZipEntry entry) throws IOException { 284 if (zipFile != null) { 285 return new ZipEntryBlob(zipFile, entry); 286 } else { 287 // should decompress since this is a generic stream 288 // FileBlob always closes the stream, but we don't want that 289 // so wrap it in a CloseShieldInputStream 290 try (CloseShieldInputStream ncin = new CloseShieldInputStream(in)) { 291 Blob blob = Blobs.createBlob(ncin); 292 filesToDelete.add(blob.getFile()); 293 return blob; 294 } 295 } 296 } 297 298 private void checkMarker() throws IOException { 299 300 if (zipFile == null) { 301 ZipEntry entry = in.getNextEntry(); 302 if (entry == null) { 303 throw new IOException("Not a valid Nuxeo Archive - no marker file found (unexpected end of zip)"); 304 } 305 if (!isMarkerEntry(entry)) { 306 throw new IOException("Not a valid Nuxeo Archive - no marker file found"); 307 } 308 } else { 309 if (!zipIndex.contains(ExportConstants.MARKER_FILE)) { 310 throw new IOException("Not a valid Nuxeo Archive - no marker file found"); 311 } 312 } 313 } 314 315 public static boolean isMarkerEntry(ZipEntry entry) { 316 return entry.getName().equals(ExportConstants.MARKER_FILE); 317 } 318 319}