001/* 002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020 021package org.nuxeo.ecm.platform.filemanager.service.extension; 022 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.Serializable; 028import java.text.ParseException; 029import java.text.SimpleDateFormat; 030import java.util.Date; 031import java.util.HashMap; 032import java.util.Map; 033import java.util.zip.ZipEntry; 034import java.util.zip.ZipException; 035import java.util.zip.ZipFile; 036 037import org.apache.commons.csv.CSVFormat; 038import org.apache.commons.csv.CSVParser; 039import org.apache.commons.csv.CSVRecord; 040import org.apache.commons.io.IOUtils; 041import org.apache.commons.logging.Log; 042import org.apache.commons.logging.LogFactory; 043import org.nuxeo.common.utils.IdUtils; 044import org.nuxeo.common.utils.Path; 045import org.nuxeo.ecm.core.api.Blob; 046import org.nuxeo.ecm.core.api.Blobs; 047import org.nuxeo.ecm.core.api.CloseableFile; 048import org.nuxeo.ecm.core.api.CoreSession; 049import org.nuxeo.ecm.core.api.DocumentModel; 050import org.nuxeo.ecm.core.api.PathRef; 051import org.nuxeo.ecm.core.schema.DocumentType; 052import org.nuxeo.ecm.core.schema.TypeConstants; 053import org.nuxeo.ecm.core.schema.types.Field; 054import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl; 055import org.nuxeo.ecm.core.schema.types.Type; 056import org.nuxeo.ecm.core.schema.types.primitives.DateType; 057import org.nuxeo.ecm.core.schema.types.primitives.IntegerType; 058import org.nuxeo.ecm.core.schema.types.primitives.LongType; 059import org.nuxeo.ecm.core.schema.types.primitives.StringType; 060import org.nuxeo.ecm.platform.types.TypeManager; 061 062public class CSVZipImporter extends AbstractFileImporter { 063 064 private static final long serialVersionUID = 1L; 065 066 private static final String MARKER = "meta-data.csv"; 067 068 private static final Log log = LogFactory.getLog(CSVZipImporter.class); 069 070 public static ZipFile getArchiveFileIfValid(File file) throws IOException { 071 ZipFile zip; 072 073 try { 074 zip = new ZipFile(file); 075 } catch (ZipException e) { 076 log.debug("file is not a zipfile ! ", e); 077 return null; 078 } catch (IOException e) { 079 log.debug("can not open zipfile ! ", e); 080 return null; 081 } 082 083 ZipEntry marker = zip.getEntry(MARKER); 084 085 if (marker == null) { 086 zip.close(); 087 return null; 088 } else { 089 return zip; 090 } 091 } 092 093 @Override 094 public DocumentModel create(CoreSession documentManager, Blob content, String path, boolean overwrite, 095 String filename, TypeManager typeService) throws IOException { 096 ZipFile zip = null; 097 try (CloseableFile source = content.getCloseableFile()) { 098 zip = getArchiveFileIfValid(source.getFile()); 099 if (zip == null) { 100 return null; 101 } 102 103 DocumentModel container = documentManager.getDocument(new PathRef(path)); 104 105 ZipEntry index = zip.getEntry(MARKER); 106 try (Reader reader = new InputStreamReader(zip.getInputStream(index)); 107 CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT.withHeader());) { 108 109 Map<String, Integer> header = csvParser.getHeaderMap(); 110 for (CSVRecord csvRecord : csvParser) { 111 String type = null; 112 String id = null; 113 Map<String, String> stringValues = new HashMap<>(); 114 for (String headerValue : header.keySet()) { 115 String lineValue = csvRecord.get(headerValue); 116 if ("type".equalsIgnoreCase(headerValue)) { 117 type = lineValue; 118 } else if ("id".equalsIgnoreCase(headerValue)) { 119 id = lineValue; 120 } else { 121 stringValues.put(headerValue, lineValue); 122 } 123 } 124 125 boolean updateDoc = false; 126 // get doc for update 127 DocumentModel targetDoc = null; 128 if (id != null) { 129 // update ? 130 String targetPath = new Path(path).append(id).toString(); 131 if (documentManager.exists(new PathRef(targetPath))) { 132 targetDoc = documentManager.getDocument(new PathRef(targetPath)); 133 updateDoc = true; 134 } 135 } 136 137 // create doc if needed 138 if (targetDoc == null) { 139 if (type == null) { 140 log.error("Can not create doc without a type, skipping line"); 141 continue; 142 } 143 144 if (id == null) { 145 id = IdUtils.generateStringId(); 146 } 147 targetDoc = documentManager.createDocumentModel(path, id, type); 148 } 149 150 // update doc properties 151 DocumentType targetDocType = targetDoc.getDocumentType(); 152 for (String fname : stringValues.keySet()) { 153 154 String stringValue = stringValues.get(fname); 155 Field field = null; 156 boolean usePrefix = false; 157 String schemaName = null; 158 String fieldName = null; 159 160 if (fname.contains(":")) { 161 if (targetDocType.hasField(fname)) { 162 field = targetDocType.getField(fname); 163 usePrefix = true; 164 } 165 } else if (fname.contains(".")) { 166 String[] parts = fname.split("\\."); 167 schemaName = parts[0]; 168 fieldName = parts[1]; 169 if (targetDocType.hasSchema(schemaName)) { 170 field = targetDocType.getField(fieldName); 171 usePrefix = false; 172 } 173 } else { 174 if (targetDocType.hasField(fname)) { 175 field = targetDocType.getField(fname); 176 usePrefix = false; 177 schemaName = field.getDeclaringType().getSchemaName(); 178 } 179 } 180 181 if (field != null) { 182 Serializable fieldValue = getFieldValue(field, stringValue, zip); 183 184 if (fieldValue != null) { 185 if (usePrefix) { 186 targetDoc.setPropertyValue(fname, fieldValue); 187 } else { 188 targetDoc.setProperty(schemaName, fieldName, fieldValue); 189 } 190 } 191 } 192 } 193 if (updateDoc) { 194 documentManager.saveDocument(targetDoc); 195 } else { 196 documentManager.createDocument(targetDoc); 197 } 198 } 199 } 200 return container; 201 } finally { 202 IOUtils.closeQuietly(zip); 203 } 204 } 205 206 protected Serializable getFieldValue(Field field, String stringValue, ZipFile zip) { 207 Serializable fieldValue = null; 208 Type type = field.getType(); 209 if (type.isSimpleType()) { 210 if (type instanceof SimpleTypeImpl) { 211 // consider super type instead 212 type = type.getSuperType(); 213 } 214 if (type instanceof StringType) { 215 fieldValue = stringValue; 216 } else if (type instanceof IntegerType) { 217 fieldValue = Integer.parseInt(stringValue); 218 } else if (type instanceof LongType) { 219 fieldValue = Long.parseLong(stringValue); 220 } else if (type instanceof DateType) { 221 try { 222 Date date; 223 if (stringValue.length() == 10) { 224 date = new SimpleDateFormat("dd/MM/yyyy").parse(stringValue); 225 } else if (stringValue.length() == 8) { 226 date = new SimpleDateFormat("dd/MM/yy").parse(stringValue); 227 } else { 228 log.warn("Unknown date format :" + stringValue); 229 return null; 230 } 231 fieldValue = date; 232 } catch (ParseException e) { 233 log.error("Error during date parsing", e); 234 } 235 } else { 236 log.warn(String.format("Unsupported field type '%s'", type)); 237 return null; 238 } 239 } else if (type.isComplexType()) { 240 if (TypeConstants.CONTENT.equals(field.getName().getLocalName())) { 241 ZipEntry blobIndex = zip.getEntry(stringValue); 242 if (blobIndex != null) { 243 Blob blob; 244 try { 245 blob = Blobs.createBlob(zip.getInputStream(blobIndex)); 246 } catch (IOException e) { 247 throw new RuntimeException(e); 248 } 249 blob.setFilename(stringValue); 250 fieldValue = (Serializable) blob; 251 } 252 } 253 } 254 255 return fieldValue; 256 } 257 258}