001/* 002 * (C) Copyright 2002-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019 020package org.nuxeo.ecm.platform.importer.xml.parser; 021 022import java.io.File; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.Serializable; 026import java.util.ArrayList; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Stack; 031 032import org.apache.commons.io.FileUtils; 033import org.apache.commons.logging.Log; 034import org.apache.commons.logging.LogFactory; 035import org.dom4j.Attribute; 036import org.dom4j.Document; 037import org.dom4j.DocumentException; 038import org.dom4j.Element; 039import org.dom4j.InvalidXPathException; 040import org.dom4j.Node; 041import org.dom4j.Text; 042import org.dom4j.io.SAXReader; 043import org.dom4j.tree.DefaultText; 044import org.mvel2.MVEL; 045import org.nuxeo.common.Environment; 046import org.nuxeo.common.utils.ExceptionUtils; 047import org.nuxeo.common.utils.ZipUtils; 048import org.nuxeo.ecm.automation.AutomationService; 049import org.nuxeo.ecm.automation.OperationContext; 050import org.nuxeo.ecm.core.api.Blob; 051import org.nuxeo.ecm.core.api.Blobs; 052import org.nuxeo.ecm.core.api.CoreSession; 053import org.nuxeo.ecm.core.api.DocumentModel; 054import org.nuxeo.ecm.core.api.NuxeoException; 055import org.nuxeo.ecm.core.api.model.Property; 056import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty; 057import org.nuxeo.ecm.core.schema.types.ListType; 058import org.nuxeo.runtime.api.Framework; 059 060/** 061 * Main implementation class for delivering the Import logic 062 * 063 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a> 064 */ 065public class XMLImporterServiceImpl { 066 067 protected static final String FILE_PROPERTY = "file"; 068 069 protected static final String CONTENT_PROPERTY = "content"; 070 071 protected static final String MIME_TYPE_PROPERTY = "mimetype"; 072 073 protected static final String FILE_NAME_PROPERTY = "filename"; 074 075 private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n" 076 + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value"; 077 078 private static final String MSG_CREATION = "**CREATION**\n" 079 + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 080 081 private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n" 082 + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 083 084 private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n" 085 + "Value found for %s in %s is \"%s\". With the following conf: %s"; 086 087 private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n" 088 + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s"; 089 090 public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class); 091 092 public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization"; 093 094 protected CoreSession session; 095 096 protected DocumentModel rootDoc; 097 098 protected Stack<DocumentModel> docsStack; 099 100 protected Map<String, List<String>> deletedAttributes = new HashMap<>(); 101 102 protected Map<String, Object> mvelCtx = new HashMap<>(); 103 104 protected Map<Element, DocumentModel> elToDoc = new HashMap<>(); 105 106 protected ParserConfigRegistry registry; 107 108 protected Boolean deferSave = false; 109 110 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) { 111 this(rootDoc, registry, null, false); 112 } 113 114 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry, Map<String, Object> mvelContext, 115 boolean deferSave) { 116 if (mvelContext != null) { 117 mvelCtx.putAll(mvelContext); 118 } 119 120 session = rootDoc.getCoreSession(); 121 this.rootDoc = rootDoc; 122 this.deferSave = deferSave; 123 124 docsStack = new Stack<>(); 125 pushInStack(rootDoc); 126 mvelCtx.put("root", rootDoc); 127 mvelCtx.put("docs", docsStack); 128 mvelCtx.put("session", session); 129 130 this.registry = registry; 131 } 132 133 protected ParserConfigRegistry getRegistry() { 134 return registry; 135 } 136 137 protected DocConfigDescriptor getDocCreationConfig(Element el) { 138 for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) { 139 // direct tagName match 140 if (conf.getTagName().equals(el.getName())) { 141 return conf; 142 } else { 143 // try xpath match 144 try { 145 if (el.matches(conf.getTagName())) { 146 return conf; 147 } 148 } catch (InvalidXPathException e) { 149 // NOP 150 } 151 } 152 } 153 return null; 154 } 155 156 protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) { 157 List<AttributeConfigDescriptor> result = new ArrayList<>(); 158 for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) { 159 if (conf.getTagName().equals(el.getName())) { 160 result.add(conf); 161 } else { 162 // try xpath match 163 try { 164 if (el.matches(conf.getTagName())) { 165 result.add(conf); 166 } 167 } catch (InvalidXPathException e) { 168 // NOP 169 } 170 } 171 } 172 return result; 173 } 174 175 protected File workingDirectory; 176 177 public List<DocumentModel> parse(InputStream is) throws IOException { 178 mvelCtx.put("source", is); 179 try { 180 Document doc; 181 doc = new SAXReader().read(is); 182 workingDirectory = null; 183 return parse(doc); 184 } catch (DocumentException e) { 185 throw new IOException(e); 186 } 187 } 188 189 public List<DocumentModel> parse(File file) throws IOException { 190 mvelCtx.put("source", file); 191 192 Document doc = null; 193 File directory = null; 194 try { 195 doc = new SAXReader().read(file); 196 workingDirectory = file.getParentFile(); 197 } catch (DocumentException e) { 198 File tmp = Environment.getDefault().getTemp(); 199 directory = new File(tmp, file.getName() + System.currentTimeMillis()); 200 directory.mkdir(); 201 ZipUtils.unzip(file, directory); 202 for (File child : directory.listFiles()) { 203 if (child.getName().endsWith(".xml")) { 204 return parse(child); 205 } 206 } 207 throw new NuxeoException("Can not find XML file inside the zip archive", e); 208 } finally { 209 FileUtils.deleteQuietly(directory); 210 } 211 return parse(doc); 212 } 213 214 public List<DocumentModel> parse(Document doc) { 215 Element root = doc.getRootElement(); 216 elToDoc = new HashMap<>(); 217 mvelCtx.put("xml", doc); 218 mvelCtx.put("map", elToDoc); 219 process(root); 220 221 // defer saveDocument to end of operation 222 if (deferSave) { 223 ArrayList<DocumentModel> a = new ArrayList<>(); 224 DocumentModel d = null; 225 while (docsStack.size() > 0) { 226 d = popStack(); 227 d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 228 d = session.saveDocument(d); 229 a.add(d); 230 } 231 return a; 232 } else { 233 return new ArrayList<>(docsStack); 234 } 235 } 236 237 protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) { 238 Map<String, Object> propValue = new HashMap<>(); 239 for (String name : conf.getMapping().keySet()) { 240 propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name))); 241 } 242 243 return propValue; 244 } 245 246 protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf, String propertyName) { 247 @SuppressWarnings("unchecked") 248 Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf); 249 250 if (propValues.containsKey(propertyName)) { 251 try { 252 Blob blob = null; 253 String content = (String) propValues.get(propertyName); 254 if (content != null && workingDirectory != null) { 255 File file = new File(workingDirectory, content.trim()); 256 if (file.exists()) { 257 blob = Blobs.createBlob(file); 258 } 259 } 260 if (blob == null && content != null) { 261 blob = Blobs.createBlob(content); 262 } 263 264 if (blob != null) { 265 if (propValues.containsKey(MIME_TYPE_PROPERTY)) { 266 blob.setMimeType((String) propValues.get(MIME_TYPE_PROPERTY)); 267 } 268 if (propValues.containsKey(FILE_NAME_PROPERTY)) { 269 blob.setFilename((String) propValues.get(FILE_NAME_PROPERTY)); 270 } 271 } 272 273 return blob; 274 } catch (IOException e) { 275 throw new RuntimeException(e); 276 } 277 } 278 return null; 279 } 280 281 @SuppressWarnings("unchecked") 282 protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) { 283 String targetDocProperty = conf.getTargetDocProperty(); 284 285 if (log.isDebugEnabled()) { 286 log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(), 287 doc.getType(), conf.toString())); 288 } 289 Property property = doc.getProperty(targetDocProperty); 290 291 if (property.isScalar()) { 292 Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 293 if (log.isTraceEnabled()) { 294 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 295 conf.toString())); 296 } 297 property.setValue(value); 298 299 } else if (property.isComplex()) { 300 301 if (property instanceof BlobProperty) { 302 Object value = resolveBlob(el, conf, CONTENT_PROPERTY); 303 if (log.isTraceEnabled()) { 304 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 305 conf.toString())); 306 } 307 property.setValue(value); 308 } else { 309 Object value = resolveComplex(el, conf); 310 if (log.isTraceEnabled()) { 311 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 312 conf.toString())); 313 } 314 property.setValue(value); 315 } 316 317 } else if (property.isList()) { 318 319 ListType lType = (ListType) property.getType(); 320 321 Serializable value; 322 323 if (lType.getFieldType().isSimpleType()) { 324 value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 325 if (value != null) { 326 Object values = property.getValue(); 327 if (values == null) { 328 property.setValue(new Object[] { value }); 329 } else if (values instanceof Object[]) { 330 int len = ((Object[]) values).length; 331 Object[] newValues = new Object[len + 1]; 332 System.arraycopy(values, 0, newValues, 0, len); 333 newValues[len] = value; 334 property.setValue(newValues); 335 } else { 336 log.error("Simple multi value property " + targetDocProperty + " is not an Array"); 337 } 338 } 339 } else { 340 Map<String, Object> props = (Map<String, Object>) resolveComplex(el, conf); 341 if (props.containsKey(FILE_PROPERTY)) { 342 Blob blob = resolveBlob(el, conf, FILE_PROPERTY); 343 props.put(FILE_PROPERTY, blob); 344 } 345 property.addValue(props); 346 value = (Serializable) props; 347 } 348 349 if (log.isTraceEnabled()) { 350 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 351 conf.toString())); 352 } 353 } 354 } 355 356 protected Map<String, Object> getMVELContext(Element el) { 357 mvelCtx.put("currentDocument", docsStack.peek()); 358 mvelCtx.put("currentElement", el); 359 mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el)); 360 return mvelCtx; 361 } 362 363 protected Object resolve(Element el, String xpr) { 364 if (xpr == null) { 365 return null; 366 } 367 368 if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL 369 xpr = xpr.substring(2, xpr.length() - 1); 370 return resolveMVEL(el, xpr); 371 } else if (xpr.contains("{{")) { // String containing XPaths 372 StringBuilder sb = new StringBuilder(); 373 int idx = xpr.indexOf("{{"); 374 while (idx >= 0) { 375 int idx2 = xpr.indexOf("}}", idx); 376 if (idx2 > 0) { 377 sb.append(xpr.substring(0, idx)); 378 String xpath = xpr.substring(idx + 2, idx2); 379 sb.append(resolveAndEvaluateXmlNode(el, xpath)); 380 xpr = xpr.substring(idx2); 381 } else { 382 sb.append(xpr); 383 xpr = ""; 384 } 385 idx = xpr.indexOf("{{"); 386 } 387 return sb.toString(); 388 } else { 389 return resolveXP(el, xpr); // default to pure XPATH 390 } 391 } 392 393 protected Object resolveMVEL(Element el, String xpr) { 394 Map<String, Object> ctx = new HashMap<>(getMVELContext(el)); 395 Serializable compiled = MVEL.compileExpression(xpr); 396 return MVEL.executeExpression(compiled, ctx); 397 } 398 399 protected Object resolveXP(Element el, String xpr) { 400 List<Node> nodes = el.selectNodes(xpr); 401 if (nodes.size() == 1) { 402 return nodes.get(0); 403 } else if (nodes.size() > 1) { 404 // Workaround for NXP-11834 405 if (xpr.endsWith("text()")) { 406 String value = ""; 407 for (Node node : nodes) { 408 if (!(node instanceof DefaultText)) { 409 String msg = "Text selector must return a string (expr:\"%s\") element %s"; 410 log.error(String.format(msg, xpr, el.getStringValue())); 411 return value; 412 } 413 value += ((DefaultText) node).getText(); 414 } 415 return new DefaultText(value); 416 } 417 return nodes; 418 } 419 return null; 420 } 421 422 protected String resolvePath(Element el, String xpr) { 423 Object ob = resolve(el, xpr); 424 if (ob == null) { 425 for (int i = 0; i < docsStack.size(); i++) { 426 if (docsStack.get(i).isFolder()) { 427 return docsStack.get(i).getPathAsString(); 428 } 429 } 430 } else { 431 if (ob instanceof DocumentModel) { 432 return ((DocumentModel) ob).getPathAsString(); 433 } else if (ob instanceof Node) { 434 if (ob instanceof Element) { 435 Element targetElement = (Element) ob; 436 DocumentModel target = elToDoc.get(targetElement); 437 if (target != null) { 438 return target.getPathAsString(); 439 } else { 440 return targetElement.getText(); 441 } 442 } else if (ob instanceof Attribute) { 443 return ((Attribute) ob).getValue(); 444 } else if (ob instanceof Text) { 445 return ((Text) ob).getText(); 446 } else if (ob.getClass().isAssignableFrom(Attribute.class)) { 447 return ((Attribute) ob).getValue(); 448 } 449 } else { 450 return ob.toString(); 451 } 452 } 453 return rootDoc.getPathAsString(); 454 } 455 456 protected String resolveName(Element el, String xpr) { 457 Object ob = resolveAndEvaluateXmlNode(el, xpr); 458 if (ob == null) { 459 return null; 460 } 461 return ob.toString(); 462 } 463 464 protected Object resolveAndEvaluateXmlNode(Element el, String xpr) { 465 Object ob = resolve(el, xpr); 466 if (ob == null) { 467 return null; 468 } 469 if (ob instanceof Node) { 470 return ((Node) ob).getText(); 471 } else { 472 return ob; 473 } 474 } 475 476 protected void createNewDocument(Element el, DocConfigDescriptor conf) { 477 DocumentModel doc = session.createDocumentModel(conf.getDocType()); 478 479 String path = resolvePath(el, conf.getParent()); 480 Object nameOb = resolveName(el, conf.getName()); 481 String name = null; 482 if (nameOb == null) { 483 if (log.isDebugEnabled()) { 484 log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath())); 485 } 486 int idx = 1; 487 for (int i = 0; i < docsStack.size(); i++) { 488 if (docsStack.get(i).getType().equals(conf.getDocType())) { 489 idx++; 490 } 491 } 492 name = conf.getDocType() + "-" + idx; 493 } else { 494 name = nameOb.toString(); 495 } 496 doc.setPathInfo(path, name); 497 498 if (log.isDebugEnabled()) { 499 if (conf.getUpdate()) { 500 log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString())); 501 } else { 502 log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 503 } 504 } 505 506 try { 507 if (conf.getUpdate() && session.exists(doc.getRef())) { 508 DocumentModel existingDoc = session.getDocument(doc.getRef()); 509 510 // get attributes, if attribute needs to be overwritten, empty in the document 511 for (Object e : el.elements()) { 512 List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e); 513 if (configs != null) { 514 if (!deletedAttributes.containsKey(existingDoc.getId())) { 515 deletedAttributes.put(existingDoc.getId(), new ArrayList<String>()); 516 } 517 for (AttributeConfigDescriptor config : configs) { 518 String targetDocProperty = config.getTargetDocProperty(); 519 // check deletedAttributes for attribute which should be overwritten 520 // if it is there, don't empty it a second time 521 if (config.overwrite 522 && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)) { 523 deletedAttributes.get(existingDoc.getId()).add(targetDocProperty); 524 existingDoc.setPropertyValue(targetDocProperty, new ArrayList<>()); 525 } 526 } 527 } 528 } 529 doc = existingDoc; 530 } else { 531 doc = session.createDocument(doc); 532 } 533 } catch (NuxeoException e) { 534 e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 535 throw e; 536 } 537 pushInStack(doc); 538 elToDoc.put(el, doc); 539 } 540 541 protected void process(Element el) { 542 DocConfigDescriptor createConf = getDocCreationConfig(el); 543 if (createConf != null) { 544 createNewDocument(el, createConf); 545 } 546 List<AttributeConfigDescriptor> configs = getAttributConfigs(el); 547 if (configs != null) { 548 for (AttributeConfigDescriptor config : configs) { 549 processDocAttributes(docsStack.peek(), el, config); 550 } 551 552 DocumentModel doc = popStack(); 553 doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 554 if (!deferSave) { 555 doc = session.saveDocument(doc); 556 } 557 pushInStack(doc); 558 559 if (createConf != null) { 560 String chain = createConf.getAutomationChain(); 561 if (chain != null && !"".equals(chain.trim())) { 562 try (OperationContext ctx = new OperationContext(session)) { 563 ctx.putAll(mvelCtx); 564 ctx.setInput(docsStack.peek()); 565 Framework.getService(AutomationService.class).run(ctx, chain); 566 } catch (NuxeoException e) { 567 throw e; 568 } catch (Exception e) { 569 ExceptionUtils.checkInterrupt(e); 570 } 571 } 572 } 573 } 574 for (Object e : el.elements()) { 575 process((Element) e); 576 } 577 } 578 579 private void pushInStack(DocumentModel doc) { 580 mvelCtx.put("changeableDocument", doc); 581 docsStack.push(doc); 582 } 583 584 private DocumentModel popStack() { 585 DocumentModel doc = docsStack.pop(); 586 mvelCtx.put("changeableDocument", doc); 587 return doc; 588 } 589 590}