java批量导入数据到数据库(如何去除重复数据并返回结果)(1)

概述

本篇是 POI系列 的最后一篇。传送门 java 开发中如何用 POI 优雅的导出 Excel 文件, Java 开发中如何用 POI 优雅的导入 Excel 文件.

场景分析

大多数开发中是不需要重复的数据的, 所以后端开发中需要做去重操作, 而且为了更加友好的交互, 我们需要将导入失败的数据返回给用户。一般数据重复有以下几个场景:

  1. Excel 中本身存在重复数据, 即本次导入存在重复数据;
  2. 数据库中已经存在了该条数据, 即历史导入存在重复数据;

为了减轻数据库的压力, 这里在设计中引入缓存 Redis 。

整体思路如下:

  1. bitmap 判断是否存在;
  2. 内存中数据是否重复;
  3. redis 和 mysql 批量插入;
  4. 数据库中插入失败处理;
代码实现

为简化无聊的 CRUD 编写, 引入了 mybatis-plus 的逆向 generator 插件。

pom.xml

<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-parent</artifactId> <version>2.3.0.RELEASE</version> <relativePath/> <!-- lookup parent from repository --> </parent> <groupId>cn.idea360</groupId> <artifactId>idc-mp</artifactId> <version>0.0.1</version> <name>idc-mp</name> <description>Demo project for Spring Boot</description> <properties> <java.version>1.8</java.version> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-devtools</artifactId> <scope>runtime</scope> <optional>true</optional> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> <exclusions> <exclusion> <groupId>org.junit.vintage</groupId> <artifactId>junit-vintage-engine</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>com.baomidou</groupId> <artifactId>mybatis-plus-boot-starter</artifactId> <version>3.3.1</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <scope>runtime</scope> </dependency> <!-- https://mvnrepository.com/artifact/com.alibaba/druid --> <dependency> <groupId>com.alibaba</groupId> <artifactId>druid</artifactId> <version>1.1.22</version> </dependency> <!-- https://mvnrepository.com/artifact/com.baomidou/mybatis-plus-generator --> <dependency> <groupId>com.baomidou</groupId> <artifactId>mybatis-plus-generator</artifactId> <version>3.3.1</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-freemarker</artifactId> </dependency> <!-- https://mvnrepository.com/artifact/io.springfox/springfox-swagger2 --> <dependency> <groupId>io.springfox</groupId> <artifactId>springfox-swagger2</artifactId> <version>2.9.2</version> </dependency> <!-- https://mvnrepository.com/artifact/io.springfox/springfox-swagger-ui --> <dependency> <groupId>io.springfox</groupId> <artifactId>springfox-swagger-ui</artifactId> <version>2.9.2</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-data-redis</artifactId> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-pool2 --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-pool2</artifactId> <version>2.8.0</version> </dependency> <!-- https://mvnrepository.com/artifact/com.alibaba/fastjson --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.68</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.10</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> </plugin> </plugins> </build> </project>

application.yml

server: port: 8888 spring: datasource: type: com.alibaba.druid.pool.DruidDataSource driverClassName: com.mysql.cj.jdbc.Driver url: jdbc:mysql://localhost:3306/mp_base?allowMultiQueries=true&useUnicode=true&characterEncoding=UTF-8&useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true username: root password: root redis: database: 0 host: localhost port: 6379 password: # 密码(默认为空) timeout: 6000ms # 连接超时时长(毫秒) lettuce: pool: max-active: 1000 # 连接池最大连接数(使用负值表示没有限制) max-wait: -1ms # 连接池最大阻塞等待时间(使用负值表示没有限制) max-idle: 10 # 连接池中的最大空闲连接 min-idle: 5 # 连接池中的最小空闲连接

mysql-schema

DROP TABLE IF EXISTS user; CREATE TABLE user ( id BIGINT(20) NOT NULL AUTO_INCREMENT COMMENT '主键ID', name VARCHAR(30) DEFAULT NULL UNIQUE COMMENT '姓名', age INT(11) DEFAULT NULL COMMENT '年龄', email VARCHAR(50) DEFAULT NULL COMMENT '邮箱', PRIMARY KEY (id) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;

generator

public class MysqlGenerator { /** * RUN THIS */ public static void main(String[] args) { // 代码生成器 AutoGenerator mpg = new AutoGenerator(); // 全局配置 GlobalConfig gc = new GlobalConfig(); String projectPath = System.getProperty("user.dir"); gc.setOutputDir(projectPath "/idc-mp/src/main/java"); gc.setAuthor("当我遇上你"); gc.setOpen(false); gc.setFileOverride(true);// 是否覆盖文件 gc.setBaseResultMap(true);// XML ResultMap gc.setBaseColumnList(true);// XML columList gc.setDateType(DateType.ONLY_DATE); gc.setSwagger2(true); // 实体属性 Swagger2 注解 gc.setIdType(IdType.AUTO); gc.setMapperName("%sMapper"); gc.setXmlName("%sMapper"); gc.setserviceName("%sService"); gc.setServiceImplName("%sServiceImpl"); gc.setControllerName("%sController"); mpg.setGlobalConfig(gc); // 数据源配置 DataSourceConfig dsc = new DataSourceConfig(); dsc.setUrl("jdbc:mysql://localhost:3306/mp_base?useUnicode=true&serverTimezone=GMT&useSSL=false&characterEncoding=utf8"); // dsc.setSchemaName("public"); dsc.setDriverName("com.mysql.cj.jdbc.Driver"); dsc.setUsername("root"); dsc.setPassword("root"); mpg.setDataSource(dsc); // 包配置 PackageConfig pc = new PackageConfig(); pc.setModuleName("mp"); pc.setParent("cn.idea360.demo.modules"); mpg.setPackageInfo(pc); // 自定义配置 InjectionConfig cfg = new InjectionConfig() { @Override public void initMap() { // to do nothing } }; List<FileOutConfig> focList = new ArrayList<>(); focList.add(new FileOutConfig("/templates/mapper.xml.ftl") { @Override public String outputFile(TableInfo tableInfo) { // 自定义输入文件名称 return projectPath "/idc-mp/src/main/resources/mapper/" pc.getModuleName() "/" tableInfo.getEntityName() "Mapper" StringPool.DOT_XML; } }); cfg.setFileOutConfigList(focList); mpg.setCfg(cfg); mpg.setTemplate(new TemplateConfig().setXml(null)); // 策略配置 StrategyConfig strategy = new StrategyConfig(); strategy.setNaming(NamingStrategy.underline_to_camel); strategy.setColumnNaming(NamingStrategy.underline_to_camel); // strategy.setSuperEntityClass("com.baomidou.mybatisplus.samples.generator.common.BaseEntity"); strategy.setEntityLombokModel(true); // strategy.setSuperControllerClass("com.baomidou.mybatisplus.samples.generator.common.BaseController"); strategy.setInclude(new String[]{"user"}); strategy.setRestControllerStyle(true); strategy.setSuperEntityColumns("id"); strategy.setControllerMappingHyphenStyle(true); // strategy.setTablePrefix(pc.getModuleName() "_"); mpg.setStrategy(strategy); // 选择 freemarker 引擎需要指定如下加,注意 pom 依赖必须有! mpg.setTemplateEngine(new FreemarkerTemplateEngine()); mpg.execute(); } }

Redis

@Configuration public class RedisConfig { @Bean public RedisTemplate<String, Serializable> redisTemplate(LettuceConnectionFactory connectionFactory) { RedisTemplate<String, Serializable> redisTemplate = new RedisTemplate<>(); redisTemplate.setKeySerializer(new StringRedisSerializer()); redisTemplate.setValueSerializer(new GenericJackson2JsonRedisSerializer()); redisTemplate.setConnectionFactory(connectionFactory); return redisTemplate; } // @Bean // public HashOperations<String, String, Object> hashOperations(RedisTemplate<String, Serializable> redisTemplate) { // return redisTemplate.opsForHash(); // } // // @Bean // public ValueOperations<String, Serializable> valueOperations(RedisTemplate<String, Serializable> redisTemplate) { // return redisTemplate.opsForValue(); // } // // @Bean // public ListOperations<String, Serializable> listOperations(RedisTemplate<String, Serializable> redisTemplate) { // return redisTemplate.opsForList(); // } // // @Bean // public SetOperations<String, Serializable> setOperations(RedisTemplate<String, Serializable> redisTemplate) { // return redisTemplate.opsForSet(); // } // // @Bean // public ZSetOperations<String, Serializable> zSetOperations(RedisTemplate<String, Serializable> redisTemplate) { // return redisTemplate.opsForZSet(); // } }

User.java

@Data @EqualsAndHashCode(callSuper = false) @Accessors(chain = true) @ApiModel(value="User对象", description="") public class User implements Serializable { private static final long serialVersionUID = 1L; @TableId(value = "id", type = IdType.AUTO) private Long id; @ApiModelProperty(value = "姓名") private String name; @ApiModelProperty(value = "年龄") private Integer age; @ApiModelProperty(value = "邮箱") private String email; public User(String name) { this.name = name; } /** * 因为会在List中判断user是否存在, 所以需要重写equals和hashCode方法 * @param o * @return */ @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; User user = (User) o; return Objects.equals(name, user.name); } @Override public int hashCode() { return Objects.hash(name); } }

HashUtils.java

public class HashUtils { public static int hash(String data) { return data.hashCode() & Integer.MAX_VALUE; } }

核心逻辑

/** * <p> * 服务实现类 * </p> * * @author 当我遇上你 * @since 2020-05-19 */ @Slf4j @Service public class UserServiceImpl extends ServiceImpl<UserMapper, User> implements UserService { @Autowired private RedisTemplate redisTemplate; /** * 1. bitmap判断是否存在 * 2. 内存中数据是否重复 * 3. redis和mysql批量插入 * 4. 数据库中是否插入失败 * @param list * @return */ @Override public JSONObject importBatch(List<User> list) { if (CollectionUtils.isEmpty(list)) { throw new NullPointerException("数据为空"); } CopyOnWriteArrayList<User> importFailList = new CopyOnWriteArrayList<>(); CopyOnWriteArrayList<User> importSuccessList = new CopyOnWriteArrayList<>(); list.stream().forEach(user -> { Boolean exist = redisTemplate.opsForValue().getBit("user", HashUtils.hash(user.getName())); if (exist) { log.error("Redis中name={}的用户已存在", user.getName()); // 数据已存在,数据放入失败集合 importFailList.add(user); return; } if (importSuccessList.contains(user)) { log.error("内存中name={}的用户已存在", user.getName()); importFailList.add(user); return; } importSuccessList.add(user); }); if (!CollectionUtils.isEmpty(importSuccessList)) { try { // 批量插入数据库 this.saveBatch(importSuccessList); } catch (Exception e) { log.error("MySQL写入冲突:{}", e.getMessage()); iterator<User> iterator = importSuccessList.iterator(); while (iterator.hasNext()) { User user = iterator.next(); if (user.getId() == null) { log.error("MySQL中name={}的用户已存在", user.getName()); importFailList.add(user); importSuccessList.remove(user); } } } // 将导入成功的数据批量写入bitmap redisTemplate.executePipelined(new RedisCallback<String>() { @Override public String doInRedis(RedisConnection redisConnection) throws DataAccessException { importSuccessList.stream().forEach(user -> { redisConnection.setBit("user".getBytes(), HashUtils.hash(user.getName()), true); }); return null; } }); } JSONObject result = new JSONObject(); result.put("success", importSuccessList); result.put("failure", importFailList); return result; } }

场景测试

@Slf4j @SpringBootTest class UserServiceImplTest { @Autowired UserService userService; /** * 模拟内存中存在重复数据 * * 2020-05-19 15:18:10.468 ERROR 6612 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : 内存中name=张三的用户已存在 * 2020-05-19 15:18:10.475 WARN 6612 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : SqlSession [org.apache.ibatis.session.defaults.DefaultSqlSession@304e1e4e] was not registered for synchronization because DataSource is not transactional * 2020-05-19 15:18:10.533 INFO 6612 --- [ main] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited * 2020-05-19 15:18:10.794 INFO 6612 --- [ main] c.i.d.m.m.s.impl.UserServiceImplTest : {"success":[{"id":1,"name":"张三"}],"failure":[{"name":"张三"}]} */ @Test void importBatch1() { User user1 = new User("张三"); User user2 = new User("张三"); List<User> userList = Arrays.asList(user1, user2); JSONObject result = userService.importBatch(userList); log.info(result.toJSONString()); } /** * 模拟Redis中存在重复数据 * * 2020-05-19 15:18:40.700 ERROR 13352 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : Redis中name=张三的用户已存在 * 2020-05-19 15:18:40.708 WARN 13352 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : SqlSession [org.apache.ibatis.session.defaults.DefaultSqlSession@60251ddb] was not registered for synchronization because DataSource is not transactional * 2020-05-19 15:18:40.768 INFO 13352 --- [ main] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited * 2020-05-19 15:18:41.043 INFO 13352 --- [ main] c.i.d.m.m.s.impl.UserServiceImplTest : {"success":[{"id":2,"name":"李四"}],"failure":[{"name":"张三"}]} */ @Test void importBatch2() { User user1 = new User("张三"); User user2 = new User("李四"); List<User> userList = Arrays.asList(user1, user2); JSONObject result = userService.importBatch(userList); log.info(result.toJSONString()); } /** * 手动在MySQL中添加1条数据, 模拟MySQL中存在重复数据 * * 2020-05-19 15:19:22.337 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : Redis中name=张三的用户已存在 * 2020-05-19 15:19:22.339 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : Redis中name=李四的用户已存在 * 2020-05-19 15:19:22.347 WARN 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : SqlSession [org.apache.ibatis.session.defaults.DefaultSqlSession@69fe8c75] was not registered for synchronization because DataSource is not transactional * 2020-05-19 15:19:22.405 INFO 14128 --- [ main] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited * 2020-05-19 15:19:22.609 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : MySQL写入冲突:cn.idea360.demo.modules.mp.mapper.UserMapper.insert (batch index #1) failed. Cause: java.sql.BatchUpdateException: Duplicate entry '王五' for key 'name' * ; Duplicate entry '王五' for key 'name'; nested exception is java.sql.BatchUpdateException: Duplicate entry '王五' for key 'name' * 2020-05-19 15:19:22.609 ERROR 14128 --- [ main] c.i.d.m.mp.service.impl.UserServiceImpl : MySQL中name=王五的用户已存在 * 2020-05-19 15:19:22.697 INFO 14128 --- [ main] c.i.d.m.m.s.impl.UserServiceImplTest : {"success":[],"failure":[{"name":"张三"},{"name":"李四"},{"name":"王五"}]} */ @Test void importBatch3() { User user1 = new User("张三"); User user2 = new User("李四"); User user3 = new User("王五"); List<User> userList = Arrays.asList(user1, user2, user3); JSONObject result = userService.importBatch(userList); log.info(result.toJSONString()); } }

最后

本文到此结束,感谢阅读。如果您觉得不错,转发 关注一波呗!

,