001/* 002 * (C) Copyright 2002-2015 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 */ 019 020package org.nuxeo.ecm.platform.importer.xml.parser; 021 022import org.apache.commons.io.FileUtils; 023import org.apache.commons.logging.Log; 024import org.apache.commons.logging.LogFactory; 025import org.dom4j.Attribute; 026import org.dom4j.Document; 027import org.dom4j.DocumentException; 028import org.dom4j.Element; 029import org.dom4j.InvalidXPathException; 030import org.dom4j.Node; 031import org.dom4j.Text; 032import org.dom4j.io.SAXReader; 033import org.dom4j.tree.DefaultText; 034import org.mvel2.MVEL; 035import org.nuxeo.common.Environment; 036import org.nuxeo.common.utils.ExceptionUtils; 037import org.nuxeo.common.utils.ZipUtils; 038import org.nuxeo.ecm.automation.AutomationService; 039import org.nuxeo.ecm.automation.OperationContext; 040import org.nuxeo.ecm.core.api.Blob; 041import org.nuxeo.ecm.core.api.Blobs; 042import org.nuxeo.ecm.core.api.CoreSession; 043import org.nuxeo.ecm.core.api.DocumentModel; 044import org.nuxeo.ecm.core.api.NuxeoException; 045import org.nuxeo.ecm.core.api.model.Property; 046import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty; 047import org.nuxeo.ecm.core.schema.types.ListType; 048import org.nuxeo.runtime.api.Framework; 049 050import java.io.File; 051import java.io.IOException; 052import java.io.InputStream; 053import java.io.Serializable; 054import java.util.ArrayList; 055import java.util.HashMap; 056import java.util.List; 057import java.util.Map; 058import java.util.Stack; 059 060/** 061 * Main implementation class for delivering the Import logic 062 * 063 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a> 064 */ 065public class XMLImporterServiceImpl { 066 067 protected static final String FILE_PROPERTY = "file"; 068 069 protected static final String CONTENT_PROPERTY = "content"; 070 071 protected static final String MIME_TYPE_PROPERTY = "mimetype"; 072 073 protected static final String FILE_NAME_PROPERTY = "filename"; 074 075 private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n" 076 + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value"; 077 078 private static final String MSG_CREATION = "**CREATION**\n" 079 + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 080 081 private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n" 082 + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 083 084 private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n" 085 + "Value found for %s in %s is \"%s\". With the following conf: %s"; 086 087 private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n" 088 + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s"; 089 090 public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class); 091 092 public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization"; 093 094 protected CoreSession session; 095 096 protected DocumentModel rootDoc; 097 098 protected Stack<DocumentModel> docsStack; 099 100 protected Map<String, List<String>> deletedAttributes = new HashMap<>(); 101 102 protected Map<String, Object> mvelCtx = new HashMap<>(); 103 104 protected Map<Element, DocumentModel> elToDoc = new HashMap<>(); 105 106 protected ParserConfigRegistry registry; 107 108 protected Boolean deferSave = false; 109 110 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) { 111 this(rootDoc, registry, null, false); 112 } 113 114 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry, 115 Map<String, Object> mvelContext, boolean deferSave) { 116 if (mvelContext != null) { 117 mvelCtx.putAll(mvelContext); 118 } 119 120 session = rootDoc.getCoreSession(); 121 this.rootDoc = rootDoc; 122 this.deferSave = deferSave; 123 124 docsStack = new Stack<>(); 125 pushInStack(rootDoc); 126 mvelCtx.put("root", rootDoc); 127 mvelCtx.put("docs", docsStack); 128 mvelCtx.put("session", session); 129 130 this.registry = registry; 131 } 132 133 protected ParserConfigRegistry getRegistry() { 134 return registry; 135 } 136 137 protected DocConfigDescriptor getDocCreationConfig(Element el) { 138 for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) { 139 // direct tagName match 140 if (conf.getTagName().equals(el.getName())) { 141 return conf; 142 } else { 143 // try xpath match 144 try { 145 if (el.matches(conf.getTagName())) { 146 return conf; 147 } 148 } catch (InvalidXPathException e) { 149 // NOP 150 } 151 } 152 } 153 return null; 154 } 155 156 protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) { 157 List<AttributeConfigDescriptor> result = new ArrayList<>(); 158 for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) { 159 if (conf.getTagName().equals(el.getName())) { 160 result.add(conf); 161 } else { 162 // try xpath match 163 try { 164 if (el.matches(conf.getTagName())) { 165 result.add(conf); 166 } 167 } catch (InvalidXPathException e) { 168 // NOP 169 } 170 } 171 } 172 return result; 173 } 174 175 protected File workingDirectory; 176 177 private AutomationService automationService; 178 179 public List<DocumentModel> parse(InputStream is) throws IOException { 180 mvelCtx.put("source", is); 181 try { 182 Document doc; 183 doc = new SAXReader().read(is); 184 workingDirectory = null; 185 return parse(doc); 186 } catch (DocumentException e) { 187 throw new IOException(e); 188 } 189 } 190 191 public List<DocumentModel> parse(File file) throws IOException { 192 mvelCtx.put("source", file); 193 194 Document doc = null; 195 File directory = null; 196 try { 197 doc = new SAXReader().read(file); 198 workingDirectory = file.getParentFile(); 199 } catch (DocumentException e) { 200 File tmp = Environment.getDefault().getTemp(); 201 directory = new File(tmp, file.getName() + System.currentTimeMillis()); 202 directory.mkdir(); 203 ZipUtils.unzip(file, directory); 204 for (File child : directory.listFiles()) { 205 if (child.getName().endsWith(".xml")) { 206 return parse(child); 207 } 208 } 209 throw new NuxeoException("Can not find XML file inside the zip archive", e); 210 } finally { 211 FileUtils.deleteQuietly(directory); 212 } 213 return parse(doc); 214 } 215 216 public List<DocumentModel> parse(Document doc) { 217 Element root = doc.getRootElement(); 218 elToDoc = new HashMap<>(); 219 mvelCtx.put("xml", doc); 220 mvelCtx.put("map", elToDoc); 221 process(root); 222 223 // defer saveDocument to end of operation 224 if (deferSave) { 225 ArrayList<DocumentModel> a = new ArrayList<>(); 226 DocumentModel d = null; 227 while (docsStack.size() > 0) { 228 d = popStack(); 229 d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 230 d = session.saveDocument(d); 231 a.add(d); 232 } 233 return a; 234 } else { 235 return new ArrayList<>(docsStack); 236 } 237 } 238 239 protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) { 240 Map<String, Object> propValue = new HashMap<>(); 241 for (String name : conf.getMapping().keySet()) { 242 propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name))); 243 } 244 245 return propValue; 246 } 247 248 protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf, String propertyName) { 249 @SuppressWarnings("unchecked") 250 Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf); 251 252 if (propValues.containsKey(propertyName)) { 253 try { 254 Blob blob = null; 255 String content = (String) propValues.get(propertyName); 256 if (content != null && workingDirectory != null) { 257 File file = new File(workingDirectory, content.trim()); 258 if (file.exists()) { 259 blob = Blobs.createBlob(file); 260 } 261 } 262 if (blob == null && content != null) { 263 blob = Blobs.createBlob(content); 264 } 265 266 if (blob != null) { 267 if (propValues.containsKey(MIME_TYPE_PROPERTY)) { 268 blob.setMimeType((String) propValues.get(MIME_TYPE_PROPERTY)); 269 } 270 if (propValues.containsKey(FILE_NAME_PROPERTY)) { 271 blob.setFilename((String) propValues.get(FILE_NAME_PROPERTY)); 272 } 273 } 274 275 return blob; 276 } catch (IOException e) { 277 throw new RuntimeException(e); 278 } 279 } 280 return null; 281 } 282 283 @SuppressWarnings("unchecked") 284 protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) { 285 String targetDocProperty = conf.getTargetDocProperty(); 286 287 if (log.isDebugEnabled()) { 288 log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(), 289 doc.getType(), conf.toString())); 290 } 291 Property property = doc.getProperty(targetDocProperty); 292 293 if (property.isScalar()) { 294 Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 295 if (log.isTraceEnabled()) { 296 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 297 conf.toString())); 298 } 299 property.setValue(value); 300 301 } else if (property.isComplex()) { 302 303 if (property instanceof BlobProperty) { 304 Object value = resolveBlob(el, conf, CONTENT_PROPERTY); 305 if (log.isTraceEnabled()) { 306 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 307 conf.toString())); 308 } 309 property.setValue(value); 310 } else { 311 Object value = resolveComplex(el, conf); 312 if (log.isTraceEnabled()) { 313 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 314 conf.toString())); 315 } 316 property.setValue(value); 317 } 318 319 } else if (property.isList()) { 320 321 ListType lType = (ListType) property.getType(); 322 323 Serializable value; 324 325 if (lType.getFieldType().isSimpleType()) { 326 value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 327 if (value != null) { 328 Object values = property.getValue(); 329 if (values == null) { 330 property.setValue(new Object[] { value }); 331 } else if (values instanceof Object[]) { 332 int len = ((Object[]) values).length; 333 Object[] newValues = new Object[len + 1]; 334 System.arraycopy(values, 0, newValues, 0, len); 335 newValues[len] = value; 336 property.setValue(newValues); 337 } else { 338 log.error("Simple multi value property " + targetDocProperty 339 + " is not an Array"); 340 } 341 } 342 } else { 343 Map<String, Object> props = (Map<String, Object>) resolveComplex(el, conf); 344 if (props.containsKey(FILE_PROPERTY)) { 345 Blob blob = resolveBlob(el, conf, FILE_PROPERTY); 346 props.put(FILE_PROPERTY, blob); 347 } 348 property.addValue(props); 349 value = (Serializable) props; 350 } 351 352 if (log.isTraceEnabled()) { 353 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 354 conf.toString())); 355 } 356 } 357 } 358 359 protected Map<String, Object> getMVELContext(Element el) { 360 mvelCtx.put("currentDocument", docsStack.peek()); 361 mvelCtx.put("currentElement", el); 362 mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el)); 363 return mvelCtx; 364 } 365 366 protected Object resolve(Element el, String xpr) { 367 if (xpr == null) { 368 return null; 369 } 370 371 if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL 372 xpr = xpr.substring(2, xpr.length() - 1); 373 return resolveMVEL(el, xpr); 374 } else if (xpr.contains("{{")) { // String containing XPaths 375 StringBuffer sb = new StringBuffer(); 376 int idx = xpr.indexOf("{{"); 377 while (idx >= 0) { 378 int idx2 = xpr.indexOf("}}", idx); 379 if (idx2 > 0) { 380 sb.append(xpr.substring(0, idx)); 381 String xpath = xpr.substring(idx + 2, idx2); 382 sb.append(resolveAndEvaluateXmlNode(el, xpath)); 383 xpr = xpr.substring(idx2); 384 } else { 385 sb.append(xpr); 386 xpr = ""; 387 } 388 idx = xpr.indexOf("{{"); 389 } 390 return sb.toString(); 391 } else { 392 return resolveXP(el, xpr); // default to pure XPATH 393 } 394 } 395 396 protected Object resolveMVEL(Element el, String xpr) { 397 Map<String, Object> ctx = new HashMap<>(getMVELContext(el)); 398 Serializable compiled = MVEL.compileExpression(xpr); 399 return MVEL.executeExpression(compiled, ctx); 400 } 401 402 protected Object resolveXP(Element el, String xpr) { 403 List<Object> nodes = el.selectNodes(xpr); 404 if (nodes.size() == 1) { 405 return nodes.get(0); 406 } else if (nodes.size() > 1) { 407 // Workaround for NXP-11834 408 if (xpr.endsWith("text()")) { 409 String value = ""; 410 for (Object node : nodes) { 411 if (!(node instanceof DefaultText)) { 412 String msg = "Text selector must return a string (expr:\"%s\") element %s"; 413 log.error(String.format(msg, xpr, el.getStringValue())); 414 return value; 415 } 416 value += ((DefaultText) node).getText(); 417 } 418 return new DefaultText(value); 419 } 420 return nodes; 421 } 422 return null; 423 } 424 425 protected String resolvePath(Element el, String xpr) { 426 Object ob = resolve(el, xpr); 427 if (ob == null) { 428 for (int i = 0; i < docsStack.size(); i++) { 429 if (docsStack.get(i).isFolder()) { 430 return docsStack.get(i).getPathAsString(); 431 } 432 } 433 } else { 434 if (ob instanceof DocumentModel) { 435 return ((DocumentModel) ob).getPathAsString(); 436 } else if (ob instanceof Node) { 437 if (ob instanceof Element) { 438 Element targetElement = (Element) ob; 439 DocumentModel target = elToDoc.get(targetElement); 440 if (target != null) { 441 return target.getPathAsString(); 442 } else { 443 return targetElement.getText(); 444 } 445 } else if (ob instanceof Attribute) { 446 return ((Attribute) ob).getValue(); 447 } else if (ob instanceof Text) { 448 return ((Text) ob).getText(); 449 } else if (ob.getClass().isAssignableFrom(Attribute.class)) { 450 return ((Attribute) ob).getValue(); 451 } 452 } else { 453 return ob.toString(); 454 } 455 } 456 return rootDoc.getPathAsString(); 457 } 458 459 protected String resolveName(Element el, String xpr) { 460 Object ob = resolveAndEvaluateXmlNode(el, xpr); 461 if (ob == null) { 462 return null; 463 } 464 return ob.toString(); 465 } 466 467 protected Object resolveAndEvaluateXmlNode(Element el, String xpr) { 468 Object ob = resolve(el, xpr); 469 if (ob == null) { 470 return null; 471 } 472 if (ob instanceof Node) { 473 return ((Node) ob).getText(); 474 } else { 475 return ob; 476 } 477 } 478 479 protected void createNewDocument(Element el, DocConfigDescriptor conf) { 480 DocumentModel doc = session.createDocumentModel(conf.getDocType()); 481 482 String path = resolvePath(el, conf.getParent()); 483 Object nameOb = resolveName(el, conf.getName()); 484 String name = null; 485 if (nameOb == null) { 486 if (log.isDebugEnabled()) { 487 log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath())); 488 } 489 int idx = 1; 490 for (int i = 0; i < docsStack.size(); i++) { 491 if (docsStack.get(i).getType().equals(conf.getDocType())) { 492 idx++; 493 } 494 } 495 name = conf.getDocType() + "-" + idx; 496 } else { 497 name = nameOb.toString(); 498 } 499 doc.setPathInfo(path, name); 500 501 if (log.isDebugEnabled()) { 502 if (conf.getUpdate()) { 503 log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString())); 504 } else { 505 log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 506 } 507 } 508 509 try { 510 if (conf.getUpdate() && session.exists(doc.getRef())) { 511 DocumentModel existingDoc = session.getDocument(doc.getRef()); 512 513 // get attributes, if attribute needs to be overwritten, empty in the document 514 for (Object e : el.elements()) { 515 List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e); 516 if (configs != null) { 517 if (!deletedAttributes.containsKey(existingDoc.getId())) { 518 deletedAttributes.put(existingDoc.getId(), new ArrayList<String>()); 519 } 520 for (AttributeConfigDescriptor config : configs) { 521 String targetDocProperty = config.getTargetDocProperty(); 522 // check deletedAttributes for attribute which should be overwritten 523 // if it is there, don't empty it a second time 524 if (config.overwrite 525 && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)) { 526 deletedAttributes.get(existingDoc.getId()).add(targetDocProperty); 527 existingDoc.setPropertyValue(targetDocProperty, new ArrayList<>()); 528 } 529 } 530 } 531 } 532 doc = existingDoc; 533 } else { 534 doc = session.createDocument(doc); 535 } 536 } catch (NuxeoException e) { 537 e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 538 throw e; 539 } 540 pushInStack(doc); 541 elToDoc.put(el, doc); 542 } 543 544 protected void process(Element el) { 545 DocConfigDescriptor createConf = getDocCreationConfig(el); 546 if (createConf != null) { 547 createNewDocument(el, createConf); 548 } 549 List<AttributeConfigDescriptor> configs = getAttributConfigs(el); 550 if (configs != null) { 551 for (AttributeConfigDescriptor config : configs) { 552 processDocAttributes(docsStack.peek(), el, config); 553 } 554 555 DocumentModel doc = popStack(); 556 doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 557 if (!deferSave) { 558 doc = session.saveDocument(doc); 559 } 560 pushInStack(doc); 561 562 if (createConf != null) { 563 String chain = createConf.getAutomationChain(); 564 if (chain != null && !"".equals(chain.trim())) { 565 try (OperationContext ctx = new OperationContext(session)) { 566 ctx.putAll(mvelCtx); 567 ctx.setInput(docsStack.peek()); 568 getAutomationService().run(ctx, chain); 569 } catch (NuxeoException e) { 570 throw e; 571 } catch (Exception e) { 572 ExceptionUtils.checkInterrupt(e); 573 } 574 } 575 } 576 } 577 for (Object e : el.elements()) { 578 process((Element) e); 579 } 580 } 581 582 private AutomationService getAutomationService() { 583 if (automationService == null) { 584 automationService = Framework.getService(AutomationService.class); 585 } 586 return automationService; 587 588 } 589 590 private void pushInStack(DocumentModel doc) { 591 mvelCtx.put("changeableDocument", doc); 592 docsStack.push(doc); 593 } 594 595 private DocumentModel popStack() { 596 DocumentModel doc = docsStack.pop(); 597 mvelCtx.put("changeableDocument", doc); 598 return doc; 599 } 600 601}